diff options
186 files changed, 1931 insertions, 1012 deletions
@@ -2161,6 +2161,19 @@ N: Mike Kravetz D: Maintenance and development of the hugetlb subsystem +N: Seth Jennings +D: Creation and maintenance of zswap + +N: Dan Streetman +D: Maintenance and development of zswap +D: Creation and maintenance of the zpool API + +N: Vitaly Wool +D: Maintenance and development of zswap + N: Andreas S. Krebs D: CYPRESS CY82C693 chipset IDE, Digital's PC-Alpha 164SX boards diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues index 906ff3ca928a..5bff64d256c2 100644 --- a/Documentation/ABI/testing/sysfs-class-net-queues +++ b/Documentation/ABI/testing/sysfs-class-net-queues @@ -1,4 +1,4 @@ -What: /sys/class/<iface>/queues/rx-<queue>/rps_cpus +What: /sys/class/net/<iface>/queues/rx-<queue>/rps_cpus Date: March 2010 KernelVersion: 2.6.35 Contact: [email protected] @@ -8,7 +8,7 @@ Description: network device queue. Possible values depend on the number of available CPU(s) in the system. -What: /sys/class/<iface>/queues/rx-<queue>/rps_flow_cnt +What: /sys/class/net/<iface>/queues/rx-<queue>/rps_flow_cnt Date: April 2010 KernelVersion: 2.6.35 Contact: [email protected] @@ -16,7 +16,7 @@ Description: Number of Receive Packet Steering flows being currently processed by this particular network device receive queue. -What: /sys/class/<iface>/queues/tx-<queue>/tx_timeout +What: /sys/class/net/<iface>/queues/tx-<queue>/tx_timeout Date: November 2011 KernelVersion: 3.3 Contact: [email protected] @@ -24,7 +24,7 @@ Description: Indicates the number of transmit timeout events seen by this network interface transmit queue. -What: /sys/class/<iface>/queues/tx-<queue>/tx_maxrate +What: /sys/class/net/<iface>/queues/tx-<queue>/tx_maxrate Date: March 2015 KernelVersion: 4.1 Contact: [email protected] @@ -32,7 +32,7 @@ Description: A Mbps max-rate set for the queue, a value of zero means disabled, default is disabled. -What: /sys/class/<iface>/queues/tx-<queue>/xps_cpus +What: /sys/class/net/<iface>/queues/tx-<queue>/xps_cpus Date: November 2010 KernelVersion: 2.6.38 Contact: [email protected] @@ -42,7 +42,7 @@ Description: network device transmit queue. Possible values depend on the number of available CPU(s) in the system. -What: /sys/class/<iface>/queues/tx-<queue>/xps_rxqs +What: /sys/class/net/<iface>/queues/tx-<queue>/xps_rxqs Date: June 2018 KernelVersion: 4.18.0 Contact: [email protected] @@ -53,7 +53,7 @@ Description: number of available receive queue(s) in the network device. Default is disabled. -What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time +What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time Date: November 2011 KernelVersion: 3.3 Contact: [email protected] @@ -62,7 +62,7 @@ Description: of this particular network device transmit queue. Default value is 1000. -What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/inflight +What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/inflight Date: November 2011 KernelVersion: 3.3 Contact: [email protected] @@ -70,7 +70,7 @@ Description: Indicates the number of bytes (objects) in flight on this network device transmit queue. -What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit +What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit Date: November 2011 KernelVersion: 3.3 Contact: [email protected] @@ -79,7 +79,7 @@ Description: on this network device transmit queue. This value is clamped to be within the bounds defined by limit_max and limit_min. -What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max +What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max Date: November 2011 KernelVersion: 3.3 Contact: [email protected] @@ -88,7 +88,7 @@ Description: queued on this network device transmit queue. See include/linux/dynamic_queue_limits.h for the default value. -What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min +What: /sys/class/net/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min Date: November 2011 KernelVersion: 3.3 Contact: [email protected] diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst index a9efab50eed8..22955d56b379 100644 --- a/Documentation/dev-tools/kunit/usage.rst +++ b/Documentation/dev-tools/kunit/usage.rst @@ -671,8 +671,23 @@ Testing Static Functions ------------------------ If we do not want to expose functions or variables for testing, one option is to -conditionally ``#include`` the test file at the end of your .c file. For -example: +conditionally export the used symbol. For example: + +.. code-block:: c + + /* In my_file.c */ + + VISIBLE_IF_KUNIT int do_interesting_thing(); + EXPORT_SYMBOL_IF_KUNIT(do_interesting_thing); + + /* In my_file.h */ + + #if IS_ENABLED(CONFIG_KUNIT) + int do_interesting_thing(void); + #endif + +Alternatively, you could conditionally ``#include`` the test file at the end of +your .c file. For example: .. code-block:: c diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 1ad01d52a863..8e4d19adee8c 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -942,6 +942,10 @@ attribute-sets: - name: gro-ipv4-max-size type: u32 + - + name: dpll-pin + type: nest + nested-attributes: link-dpll-pin-attrs - name: af-spec-attrs attributes: @@ -1627,6 +1631,12 @@ attribute-sets: - name: used type: u8 + - + name: link-dpll-pin-attrs + attributes: + - + name: id + type: u32 sub-messages: - diff --git a/MAINTAINERS b/MAINTAINERS index 8999497011a2..722b894f305e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10283,7 +10283,7 @@ F: drivers/scsi/ibmvscsi/ibmvscsi* F: include/scsi/viosrp.h IBM Power Virtual SCSI Device Target Driver -M: Michael Cyr <[email protected]> +M: Tyrel Datwyler <[email protected]> S: Supported @@ -11725,6 +11725,7 @@ F: fs/smb/server/ KERNEL UNIT TESTING FRAMEWORK (KUnit) M: Brendan Higgins <[email protected]> M: David Gow <[email protected]> +R: Rae Moar <[email protected]> S: Maintained @@ -12903,6 +12904,8 @@ M: Alejandro Colomar <[email protected]> S: Maintained W: http://www.kernel.org/doc/man-pages +T: git git://git.kernel.org/pub/scm/docs/man-pages/man-pages.git +T: git git://www.alejandro-colomar.es/src/alx/linux/man-pages/man-pages.git MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP) M: Jeremy Kerr <[email protected]> @@ -15178,6 +15181,7 @@ F: Documentation/networking/net_cachelines/net_device.rst F: drivers/connector/ F: drivers/net/ F: include/dt-bindings/net/ +F: include/linux/cn_proc.h F: include/linux/etherdevice.h F: include/linux/fcdevice.h F: include/linux/fddidevice.h @@ -15185,6 +15189,7 @@ F: include/linux/hippidevice.h F: include/linux/if_* F: include/linux/inetdevice.h F: include/linux/netdevice.h +F: include/uapi/linux/cn_proc.h F: include/uapi/linux/if_* F: include/uapi/linux/netdevice.h X: drivers/net/wireless/ @@ -18082,7 +18087,6 @@ F: drivers/net/ethernet/qualcomm/emac/ QUALCOMM ETHQOS ETHERNET DRIVER M: Vinod Koul <[email protected]> -R: Bhupesh Sharma <[email protected]> S: Maintained @@ -24341,13 +24345,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/zonefs.git F: Documentation/filesystems/zonefs.rst F: fs/zonefs/ -ZPOOL COMPRESSED PAGE STORAGE API -M: Dan Streetman <[email protected]> -S: Maintained -F: include/linux/zpool.h -F: mm/zpool.c - ZR36067 VIDEO FOR LINUX DRIVER M: Corentin Labbe <[email protected]> @@ -24399,7 +24396,9 @@ M: Nhat Pham <[email protected]> S: Maintained F: Documentation/admin-guide/mm/zswap.rst +F: include/linux/zpool.h F: include/linux/zswap.h +F: mm/zpool.c F: mm/zswap.c THE REST @@ -294,15 +294,15 @@ may-sync-config := 1 single-build := ifneq ($(filter $(no-dot-config-targets), $(MAKECMDGOALS)),) - ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) + ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) need-config := - endif + endif endif ifneq ($(filter $(no-sync-config-targets), $(MAKECMDGOALS)),) - ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),) + ifeq ($(filter-out $(no-sync-config-targets), $(MAKECMDGOALS)),) may-sync-config := - endif + endif endif need-compiler := $(may-sync-config) @@ -323,9 +323,9 @@ endif # We cannot build single targets and the others at the same time ifneq ($(filter $(single-targets), $(MAKECMDGOALS)),) single-build := 1 - ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),) + ifneq ($(filter-out $(single-targets), $(MAKECMDGOALS)),) mixed-build := 1 - endif + endif endif # For "make -j clean all", "make -j mrproper defconfig all", etc. @@ -1666,7 +1666,7 @@ help: @echo ' (sparse by default)' @echo ' make C=2 [targets] Force check of all c source with $$CHECK' @echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections' - @echo ' make W=n [targets] Enable extra build checks, n=1,2,3 where' + @echo ' make W=n [targets] Enable extra build checks, n=1,2,3,c,e where' @echo ' 1: warnings which may be relevant and do not occur too often' @echo ' 2: warnings which occur quite often but may still be relevant' @echo ' 3: more obscure warnings, can most likely be ignored' diff --git a/arch/Kconfig b/arch/Kconfig index c91917b50873..a5af0edd3eb8 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -673,6 +673,7 @@ config SHADOW_CALL_STACK bool "Shadow Call Stack" depends on ARCH_SUPPORTS_SHADOW_CALL_STACK depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER + depends on MMU help This option enables the compiler's Shadow Call Stack, which uses a shadow stack to protect function return addresses from diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 43e39040d3ac..76ef1a67c361 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -15,10 +15,10 @@ KBUILD_DEFCONFIG := multi_defconfig ifdef cross_compiling - ifeq ($(CROSS_COMPILE),) + ifeq ($(CROSS_COMPILE),) CROSS_COMPILE := $(call cc-cross-prefix, \ m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-) - endif + endif endif # diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index d14ccc948a29..5c845e8d59d9 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -25,7 +25,6 @@ config PARISC select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE select BUG - select BUILDTIME_TABLE_SORT select HAVE_KERNEL_UNCOMPRESSED select HAVE_PCI select HAVE_PERF_EVENTS diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 920db57b6b4c..7486b3b30594 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -50,12 +50,12 @@ export CROSS32CC # Set default cross compiler for kernel build ifdef cross_compiling - ifeq ($(CROSS_COMPILE),) + ifeq ($(CROSS_COMPILE),) CC_SUFFIXES = linux linux-gnu unknown-linux-gnu suse-linux CROSS_COMPILE := $(call cc-cross-prefix, \ $(foreach a,$(CC_ARCHES), \ $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-))) - endif + endif endif ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 74d17d7e759d..5937d5edaba1 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -576,6 +576,7 @@ .section __ex_table,"aw" ! \ .align 4 ! \ .word (fault_addr - .), (except_addr - .) ! \ + or %r0,%r0,%r0 ! \ .previous diff --git a/arch/parisc/include/asm/extable.h b/arch/parisc/include/asm/extable.h new file mode 100644 index 000000000000..4ea23e3d79dc --- /dev/null +++ b/arch/parisc/include/asm/extable.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PARISC_EXTABLE_H +#define __PARISC_EXTABLE_H + +#include <asm/ptrace.h> +#include <linux/compiler.h> + +/* + * The exception table consists of three addresses: + * + * - A relative address to the instruction that is allowed to fault. + * - A relative address at which the program should continue (fixup routine) + * - An asm statement which specifies which CPU register will + * receive -EFAULT when an exception happens if the lowest bit in + * the fixup address is set. + * + * Note: The register specified in the err_opcode instruction will be + * modified at runtime if a fault happens. Register %r0 will be ignored. + * + * Since relative addresses are used, 32bit values are sufficient even on + * 64bit kernel. + */ + +struct pt_regs; +int fixup_exception(struct pt_regs *regs); + +#define ARCH_HAS_RELATIVE_EXTABLE +struct exception_table_entry { + int insn; /* relative address of insn that is allowed to fault. */ + int fixup; /* relative address of fixup routine */ + int err_opcode; /* sample opcode with register which holds error code */ +}; + +#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr, opcode )\ + ".section __ex_table,\"aw\"\n" \ + ".align 4\n" \ + ".word (" #fault_addr " - .), (" #except_addr " - .)\n" \ + opcode "\n" \ + ".previous\n" + +/* + * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry + * (with lowest bit set) for which the fault handler in fixup_exception() will + * load -EFAULT on fault into the register specified by the err_opcode instruction, + * and zeroes the target register in case of a read fault in get_user(). + */ +#define ASM_EXCEPTIONTABLE_VAR(__err_var) \ + int __err_var = 0 +#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr, register )\ + ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1, "or %%r0,%%r0," register) + +static inline void swap_ex_entry_fixup(struct exception_table_entry *a, + struct exception_table_entry *b, + struct exception_table_entry tmp, + int delta) +{ + a->fixup = b->fixup + delta; + b->fixup = tmp.fixup - delta; + a->err_opcode = b->err_opcode; + b->err_opcode = tmp.err_opcode; +} +#define swap_ex_entry_fixup swap_ex_entry_fixup + +#endif diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index c822bd0c0e3c..51f40eaf7780 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -8,7 +8,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ @@ -22,7 +23,8 @@ "copy %%r0,%0\n" \ "8:\tlpa %%r0(%%sr3,%1),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b, \ + "or %%r0,%%r0,%%r0") \ : "=&r" (pa) \ : "r" (va) \ : "memory" \ diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 4165079898d9..88d0ae5769dd 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -7,6 +7,7 @@ */ #include <asm/page.h> #include <asm/cache.h> +#include <asm/extable.h> #include <linux/bug.h> #include <linux/string.h> @@ -26,37 +27,6 @@ #define STD_USER(sr, x, ptr) __put_user_asm(sr, "std", x, ptr) #endif -/* - * The exception table contains two values: the first is the relative offset to - * the address of the instruction that is allowed to fault, and the second is - * the relative offset to the address of the fixup routine. Since relative - * addresses are used, 32bit values are sufficient even on 64bit kernel. - */ - -#define ARCH_HAS_RELATIVE_EXTABLE -struct exception_table_entry { - int insn; /* relative address of insn that is allowed to fault. */ - int fixup; /* relative address of fixup routine */ -}; - -#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ - ".section __ex_table,\"aw\"\n" \ - ".align 4\n" \ - ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ - ".previous\n" - -/* - * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() creates a special exception table entry - * (with lowest bit set) for which the fault handler in fixup_exception() will - * load -EFAULT into %r29 for a read or write fault, and zeroes the target - * register in case of a read fault in get_user(). - */ -#define ASM_EXCEPTIONTABLE_REG 29 -#define ASM_EXCEPTIONTABLE_VAR(__variable) \ - register long __variable __asm__ ("r29") = 0 -#define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\ - ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1) - #define __get_user_internal(sr, val, ptr) \ ({ \ ASM_EXCEPTIONTABLE_VAR(__gu_err); \ @@ -83,7 +53,7 @@ struct exception_table_entry { \ __asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ : "=r"(__gu_val), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -115,8 +85,8 @@ struct exception_table_entry { "1: ldw 0(%%sr%2,%3),%0\n" \ "2: ldw 4(%%sr%2,%3),%R0\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%1") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%1") \ : "=&r"(__gu_tmp.l), "+r"(__gu_err) \ : "i"(sr), "r"(ptr)); \ \ @@ -174,7 +144,7 @@ struct exception_table_entry { __asm__ __volatile__ ( \ "1: " stx " %1,0(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(x), "i"(sr), "r"(ptr)) @@ -186,15 +156,14 @@ struct exception_table_entry { "1: stw %1,0(%%sr%2,%3)\n" \ "2: stw %R1,4(%%sr%2,%3)\n" \ "9:\n" \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b, "%0") \ + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b, "%0") \ : "+r"(__pu_err) \ : "r"(__val), "i"(sr), "r"(ptr)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ - /* * Complex access routines -- external declarations */ @@ -216,7 +185,4 @@ unsigned long __must_check raw_copy_from_user(void *dst, const void __user *src, #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER -struct pt_regs; -int fixup_exception(struct pt_regs *regs); - #endif /* __PARISC_UACCESS_H */ diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 268d90a9325b..422f3e1e6d9c 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -58,7 +58,7 @@ int pa_serialize_tlb_flushes __ro_after_init; struct pdc_cache_info cache_info __ro_after_init; #ifndef CONFIG_PA20 -struct pdc_btlb_info btlb_info __ro_after_init; +struct pdc_btlb_info btlb_info; #endif DEFINE_STATIC_KEY_TRUE(parisc_has_cache); @@ -264,6 +264,10 @@ parisc_cache_init(void) icache_stride = CAFL_STRIDE(cache_info.ic_conf); #undef CAFL_STRIDE + /* stride needs to be non-zero, otherwise cache flushes will not work */ + WARN_ON(cache_info.dc_size && dcache_stride == 0); + WARN_ON(cache_info.ic_size && icache_stride == 0); + if ((boot_cpu_data.pdc.capabilities & PDC_MODEL_NVA_MASK) == PDC_MODEL_NVA_UNSUPPORTED) { printk(KERN_WARNING "parisc_cache_init: Only equivalent aliasing supported!\n"); @@ -850,7 +854,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, #endif " fic,m %3(%4,%0)\n" "2: sync\n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1") : "+r" (start), "+r" (error) : "r" (end), "r" (dcache_stride), "i" (SR_USER)); } @@ -865,7 +869,7 @@ SYSCALL_DEFINE3(cacheflush, unsigned long, addr, unsigned long, bytes, #endif " fdc,m %3(%4,%0)\n" "2: sync\n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 2b, "%1") : "+r" (start), "+r" (error) : "r" (end), "r" (icache_stride), "i" (SR_USER)); } diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 25f9b9e9d6df..c7ff339732ba 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -742,7 +742,7 @@ parse_tree_node(struct device *parent, int index, struct hardware_path *modpath) }; if (device_for_each_child(parent, &recurse_data, descend_children)) - { /* nothing */ }; + { /* nothing */ } return d.dev; } @@ -1004,6 +1004,9 @@ static __init int qemu_print_iodc_data(struct device *lin_dev, void *data) pr_info("\n"); + /* Prevent hung task messages when printing on serial console */ + cond_resched(); + pr_info("#define HPA_%08lx_DESCRIPTION \"%s\"\n", hpa, parisc_hardware_description(&dev->id)); diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index ce25acfe4889..c520e551a165 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -120,8 +120,8 @@ static int emulate_ldh(struct pt_regs *regs, int toreg) "2: ldbs 1(%%sr1,%3), %0\n" " depw %2, 23, 24, %0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1) : "r" (saddr), "r" (regs->isr) ); @@ -152,8 +152,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) " mtctl %2,11\n" " vshd %0,%3,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2) : "r" (saddr), "r" (regs->isr) ); @@ -189,8 +189,8 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " mtsar %%r19\n" " shrpd %0,%%r20,%%sar,%0\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") : "=r" (val), "+r" (ret) : "0" (val), "r" (saddr), "r" (regs->isr) : "r19", "r20" ); @@ -209,9 +209,9 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " vshd %0,%R0,%0\n" " vshd %R0,%4,%R0\n" "4: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b, "%1") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1") : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) : "r" (regs->isr) ); } @@ -244,8 +244,8 @@ static int emulate_sth(struct pt_regs *regs, int frreg) "1: stb %1, 0(%%sr1, %3)\n" "2: stb %2, 1(%%sr1, %3)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret), "=&r" (temp1) : "r" (val), "r" (regs->ior), "r" (regs->isr) ); @@ -285,8 +285,8 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) " stw %%r20,0(%%sr1,%2)\n" " stw %%r21,4(%%sr1,%2)\n" "3: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -329,10 +329,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "3: std %%r20,0(%%sr1,%2)\n" "4: std %%r21,8(%%sr1,%2)\n" "5: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1" ); @@ -357,11 +357,11 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "4: stw %%r1,4(%%sr1,%2)\n" "5: stw %R1,8(%%sr1,%2)\n" "6: \n" - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b) - ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b, "%0") + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b, "%0") : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r1" ); diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 548051b0b4af..b445e47903cf 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -127,7 +127,7 @@ SECTIONS } #endif - RO_DATA(8) + RO_DATA(PAGE_SIZE) /* unwind info */ . = ALIGN(4); diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 2fe5b44986e0..c39de84e98b0 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -150,11 +150,16 @@ int fixup_exception(struct pt_regs *regs) * Fix up get_user() and put_user(). * ASM_EXCEPTIONTABLE_ENTRY_EFAULT() sets the least-significant * bit in the relative address of the fixup routine to indicate - * that gr[ASM_EXCEPTIONTABLE_REG] should be loaded with - * -EFAULT to report a userspace access error. + * that the register encoded in the "or %r0,%r0,register" + * opcode should be loaded with -EFAULT to report a userspace + * access error. */ if (fix->fixup & 1) { - regs->gr[ASM_EXCEPTIONTABLE_REG] = -EFAULT; + int fault_error_reg = fix->err_opcode & 0x1f; + if (!WARN_ON(!fault_error_reg)) + regs->gr[fault_error_reg] = -EFAULT; + pr_debug("Unalignment fixup of register %d at %pS\n", + fault_error_reg, (void*)regs->iaoq[0]); /* zero target register for get_user() */ if (parisc_acctyp(0, regs->iir) == VM_READ) { diff --git a/arch/um/Makefile b/arch/um/Makefile index 82f05f250634..34957dcb88b9 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -115,7 +115,9 @@ archprepare: $(Q)$(MAKE) $(build)=$(HOST_DIR)/um include/generated/user_constants.h LINK-$(CONFIG_LD_SCRIPT_STATIC) += -static -LINK-$(CONFIG_LD_SCRIPT_DYN) += $(call cc-option, -no-pie) +ifdef CONFIG_LD_SCRIPT_DYN +LINK-$(call gcc-min-version, 60100)$(CONFIG_CC_IS_CLANG) += -no-pie +endif LINK-$(CONFIG_LD_SCRIPT_DYN_RPATH) += -Wl,-rpath,/lib CFLAGS_NO_HARDENING := $(call cc-option, -fno-PIC,) $(call cc-option, -fno-pic,) \ diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 1a068de12a56..2264db14a25d 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -112,13 +112,13 @@ ifeq ($(CONFIG_X86_32),y) # temporary until string.h is fixed KBUILD_CFLAGS += -ffreestanding - ifeq ($(CONFIG_STACKPROTECTOR),y) - ifeq ($(CONFIG_SMP),y) + ifeq ($(CONFIG_STACKPROTECTOR),y) + ifeq ($(CONFIG_SMP),y) KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard - else + else KBUILD_CFLAGS += -mstack-protector-guard=global - endif - endif + endif + endif else BITS := 64 UTS_MACHINE := x86_64 diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h index 8fa6ac0e2d76..d91b37f5b4bb 100644 --- a/arch/x86/include/asm/kmsan.h +++ b/arch/x86/include/asm/kmsan.h @@ -64,6 +64,7 @@ static inline bool kmsan_virt_addr_valid(void *addr) { unsigned long x = (unsigned long)addr; unsigned long y = x - __START_KERNEL_map; + bool ret; /* use the carry flag to determine if x was < __START_KERNEL_map */ if (unlikely(x > y)) { @@ -79,7 +80,21 @@ static inline bool kmsan_virt_addr_valid(void *addr) return false; } - return pfn_valid(x >> PAGE_SHIFT); + /* + * pfn_valid() relies on RCU, and may call into the scheduler on exiting + * the critical section. However, this would result in recursion with + * KMSAN. Therefore, disable preemption here, and re-enable preemption + * below while suppressing reschedules to avoid recursion. + * + * Note, this sacrifices occasionally breaking scheduling guarantees. + * Although, a kernel compiled with KMSAN has already given up on any + * performance guarantees due to being heavily instrumented. + */ + preempt_disable(); + ret = pfn_valid(x >> PAGE_SHIFT); + preempt_enable_no_resched(); + + return ret; } #endif /* !MODULE */ diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index a148ff1f0872..a4f6884416a0 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -4545,6 +4545,7 @@ struct caam_hash_alg { struct list_head entry; struct device *dev; int alg_type; + bool is_hmac; struct ahash_alg ahash_alg; }; @@ -4571,7 +4572,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) ctx->dev = caam_hash->dev; - if (alg->setkey) { + if (caam_hash->is_hmac) { ctx->adata.key_dma = dma_map_single_attrs(ctx->dev, ctx->key, ARRAY_SIZE(ctx->key), DMA_TO_DEVICE, @@ -4611,7 +4612,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) * For keyed hash algorithms shared descriptors * will be created later in setkey() callback */ - return alg->setkey ? 0 : ahash_set_sh_desc(ahash); + return caam_hash->is_hmac ? 0 : ahash_set_sh_desc(ahash); } static void caam_hash_cra_exit(struct crypto_tfm *tfm) @@ -4646,12 +4647,14 @@ static struct caam_hash_alg *caam_hash_alloc(struct device *dev, template->hmac_name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->hmac_driver_name); + t_alg->is_hmac = true; } else { snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->driver_name); t_alg->ahash_alg.setkey = NULL; + t_alg->is_hmac = false; } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 290c8500c247..fdd724228c2f 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -1753,6 +1753,7 @@ static struct caam_hash_template driver_hash[] = { struct caam_hash_alg { struct list_head entry; int alg_type; + bool is_hmac; struct ahash_engine_alg ahash_alg; }; @@ -1804,7 +1805,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) } else { if (priv->era >= 6) { ctx->dir = DMA_BIDIRECTIONAL; - ctx->key_dir = alg->setkey ? DMA_TO_DEVICE : DMA_NONE; + ctx->key_dir = caam_hash->is_hmac ? DMA_TO_DEVICE : DMA_NONE; } else { ctx->dir = DMA_TO_DEVICE; ctx->key_dir = DMA_NONE; @@ -1862,7 +1863,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) * For keyed hash algorithms shared descriptors * will be created later in setkey() callback */ - return alg->setkey ? 0 : ahash_set_sh_desc(ahash); + return caam_hash->is_hmac ? 0 : ahash_set_sh_desc(ahash); } static void caam_hash_cra_exit(struct crypto_tfm *tfm) @@ -1915,12 +1916,14 @@ caam_hash_alloc(struct caam_hash_template *template, template->hmac_name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->hmac_driver_name); + t_alg->is_hmac = true; } else { snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", template->name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->driver_name); halg->setkey = NULL; + t_alg->is_hmac = false; } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index 479062aa5e6b..94a0ebb03d8c 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -463,6 +463,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) hw_data->fw_name = ADF_402XX_FW; hw_data->fw_mmp_name = ADF_402XX_MMP; hw_data->uof_get_name = uof_get_name_402xx; + hw_data->get_ena_thd_mask = get_ena_thd_mask; break; case ADF_401XX_PCI_DEVICE_ID: hw_data->fw_name = ADF_4XXX_FW; diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 0547253d16fe..7d3346b3a2bf 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -118,10 +118,9 @@ static int textual_leaf_to_string(const u32 *block, char *buf, size_t size) * @buf: where to put the string * @size: size of @buf, in bytes * - * The string is taken from a minimal ASCII text descriptor leaf after - * the immediate entry with @key. The string is zero-terminated. - * An overlong string is silently truncated such that it and the - * zero byte fit into @size. + * The string is taken from a minimal ASCII text descriptor leaf just after the entry with the + * @key. The string is zero-terminated. An overlong string is silently truncated such that it + * and the zero byte fit into @size. * * Returns strlen(buf) or a negative error code. */ @@ -368,8 +367,17 @@ static ssize_t show_text_leaf(struct device *dev, for (i = 0; i < ARRAY_SIZE(directories) && !!directories[i]; ++i) { int result = fw_csr_string(directories[i], attr->key, buf, bufsize); // Detected. - if (result >= 0) + if (result >= 0) { ret = result; + } else if (i == 0 && attr->key == CSR_VENDOR) { + // Sony DVMC-DA1 has configuration ROM such that the descriptor leaf entry + // in the root directory follows to the directory entry for vendor ID + // instead of the immediate value for vendor ID. + result = fw_csr_string(directories[i], CSR_DIRECTORY | attr->key, buf, + bufsize); + if (result >= 0) + ret = result; + } } if (ret >= 0) { diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index d9ef45fcaeab..470ae2c29c94 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -143,6 +143,9 @@ u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *s } EXPORT_SYMBOL_GPL(call_hid_bpf_rdesc_fixup); +/* Disables missing prototype warnings */ +__bpf_kfunc_start_defs(); + /** * hid_bpf_get_data - Get the kernel memory pointer associated with the context @ctx * @@ -152,7 +155,7 @@ EXPORT_SYMBOL_GPL(call_hid_bpf_rdesc_fixup); * * @returns %NULL on error, an %__u8 memory pointer on success */ -noinline __u8 * +__bpf_kfunc __u8 * hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr_buf_size) { struct hid_bpf_ctx_kern *ctx_kern; @@ -167,6 +170,7 @@ hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr return ctx_kern->data + offset; } +__bpf_kfunc_end_defs(); /* * The following set contains all functions we agree BPF programs @@ -241,6 +245,42 @@ int hid_bpf_reconnect(struct hid_device *hdev) return 0; } +static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct bpf_prog *prog, + __u32 flags) +{ + int fd, err, prog_type; + + prog_type = hid_bpf_get_prog_attach_type(prog); + if (prog_type < 0) + return prog_type; + + if (prog_type >= HID_BPF_PROG_TYPE_MAX) + return -EINVAL; + + if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) { + err = hid_bpf_allocate_event_data(hdev); + if (err) + return err; + } + + fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, prog, flags); + if (fd < 0) + return fd; + + if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) { + err = hid_bpf_reconnect(hdev); + if (err) { + close_fd(fd); + return err; + } + } + + return fd; +} + +/* Disables missing prototype warnings */ +__bpf_kfunc_start_defs(); + /** * hid_bpf_attach_prog - Attach the given @prog_fd to the given HID device * @@ -253,22 +293,17 @@ int hid_bpf_reconnect(struct hid_device *hdev) * is pinned to the BPF file system). */ /* called from syscall */ -noinline int +__bpf_kfunc int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) { struct hid_device *hdev; + struct bpf_prog *prog; struct device *dev; - int fd, err, prog_type = hid_bpf_get_prog_attach_type(prog_fd); + int err, fd; if (!hid_bpf_ops) return -EINVAL; - if (prog_type < 0) - return prog_type; - - if (prog_type >= HID_BPF_PROG_TYPE_MAX) - return -EINVAL; - if ((flags & ~HID_BPF_FLAG_MASK)) return -EINVAL; @@ -278,25 +313,29 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) hdev = to_hid_device(dev); - if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) { - err = hid_bpf_allocate_event_data(hdev); - if (err) - return err; + /* + * take a ref on the prog itself, it will be released + * on errors or when it'll be detached + */ + prog = bpf_prog_get(prog_fd); + if (IS_ERR(prog)) { + err = PTR_ERR(prog); + goto out_dev_put; } - fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, flags); - if (fd < 0) - return fd; - - if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) { - err = hid_bpf_reconnect(hdev); - if (err) { - close_fd(fd); - return err; - } + fd = do_hid_bpf_attach_prog(hdev, prog_fd, prog, flags); + if (fd < 0) { + err = fd; + goto out_prog_put; } return fd; + + out_prog_put: + bpf_prog_put(prog); + out_dev_put: + put_device(dev); + return err; } /** @@ -306,7 +345,7 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) * * @returns A pointer to &struct hid_bpf_ctx on success, %NULL on error. */ -noinline struct hid_bpf_ctx * +__bpf_kfunc struct hid_bpf_ctx * hid_bpf_allocate_context(unsigned int hid_id) { struct hid_device *hdev; @@ -323,8 +362,10 @@ hid_bpf_allocate_context(unsigned int hid_id) hdev = to_hid_device(dev); ctx_kern = kzalloc(sizeof(*ctx_kern), GFP_KERNEL); - if (!ctx_kern) + if (!ctx_kern) { + put_device(dev); return NULL; + } ctx_kern->ctx.hid = hdev; @@ -337,14 +378,19 @@ hid_bpf_allocate_context(unsigned int hid_id) * @ctx: the HID-BPF context to release * */ -noinline void +__bpf_kfunc void hid_bpf_release_context(struct hid_bpf_ctx *ctx) { struct hid_bpf_ctx_kern *ctx_kern; + struct hid_device *hid; ctx_kern = container_of(ctx, struct hid_bpf_ctx_kern, ctx); + hid = (struct hid_device *)ctx_kern->ctx.hid; /* ignore const */ kfree(ctx_kern); + + /* get_device() is called by bus_find_device() */ + put_device(&hid->dev); } /** @@ -358,7 +404,7 @@ hid_bpf_release_context(struct hid_bpf_ctx *ctx) * * @returns %0 on success, a negative error code otherwise. */ -noinline int +__bpf_kfunc int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, enum hid_report_type rtype, enum hid_class_request reqtype) { @@ -426,6 +472,7 @@ hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, kfree(dma_data); return ret; } +__bpf_kfunc_end_defs(); /* our HID-BPF entrypoints */ BTF_SET8_START(hid_bpf_fmodret_ids) diff --git a/drivers/hid/bpf/hid_bpf_dispatch.h b/drivers/hid/bpf/hid_bpf_dispatch.h index 63dfc8605cd2..fbe0639d09f2 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.h +++ b/drivers/hid/bpf/hid_bpf_dispatch.h @@ -12,9 +12,9 @@ struct hid_bpf_ctx_kern { int hid_bpf_preload_skel(void); void hid_bpf_free_links_and_skel(void); -int hid_bpf_get_prog_attach_type(int prog_fd); +int hid_bpf_get_prog_attach_type(struct bpf_prog *prog); int __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, int prog_fd, - __u32 flags); + struct bpf_prog *prog, __u32 flags); void __hid_bpf_destroy_device(struct hid_device *hdev); int hid_bpf_prog_run(struct hid_device *hdev, enum hid_bpf_prog_type type, struct hid_bpf_ctx_kern *ctx_kern); diff --git a/drivers/hid/bpf/hid_bpf_jmp_table.c b/drivers/hid/bpf/hid_bpf_jmp_table.c index eca34b7372f9..aa8e1c79cdf5 100644 --- a/drivers/hid/bpf/hid_bpf_jmp_table.c +++ b/drivers/hid/bpf/hid_bpf_jmp_table.c @@ -196,6 +196,7 @@ static void __hid_bpf_do_release_prog(int map_fd, unsigned int idx) static void hid_bpf_release_progs(struct work_struct *work) { int i, j, n, map_fd = -1; + bool hdev_destroyed; if (!jmp_table.map) return; @@ -220,6 +221,12 @@ static void hid_bpf_release_progs(struct work_struct *work) if (entry->hdev) { hdev = entry->hdev; type = entry->type; + /* + * hdev is still valid, even if we are called after hid_destroy_device(): + * when hid_bpf_attach() gets called, it takes a ref on the dev through + * bus_find_device() + */ + hdev_destroyed = hdev->bpf.destroyed; hid_bpf_populate_hdev(hdev, type); @@ -232,12 +239,19 @@ static void hid_bpf_release_progs(struct work_struct *work) if (test_bit(next->idx, jmp_table.enabled)) continue; - if (next->hdev == hdev && next->type == type) + if (next->hdev == hdev && next->type == type) { + /* + * clear the hdev reference and decrement the device ref + * that was taken during bus_find_device() while calling + * hid_bpf_attach() + */ next->hdev = NULL; + put_device(&hdev->dev); + } } - /* if type was rdesc fixup, reconnect device */ - if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP) + /* if type was rdesc fixup and the device is not gone, reconnect device */ + if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP && !hdev_destroyed) hid_bpf_reconnect(hdev); } } @@ -333,15 +347,10 @@ static int hid_bpf_insert_prog(int prog_fd, struct bpf_prog *prog) return err; } -int hid_bpf_get_prog_attach_type(int prog_fd) +int hid_bpf_get_prog_attach_type(struct bpf_prog *prog) { - struct bpf_prog *prog = NULL; - int i; int prog_type = HID_BPF_PROG_TYPE_UNDEF; - - prog = bpf_prog_get(prog_fd); - if (IS_ERR(prog)) - return PTR_ERR(prog); + int i; for (i = 0; i < HID_BPF_PROG_TYPE_MAX; i++) { if (hid_bpf_btf_ids[i] == prog->aux->attach_btf_id) { @@ -350,8 +359,6 @@ int hid_bpf_get_prog_attach_type(int prog_fd) } } - bpf_prog_put(prog); - return prog_type; } @@ -388,19 +395,13 @@ static const struct bpf_link_ops hid_bpf_link_lops = { /* called from syscall */ noinline int __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, - int prog_fd, __u32 flags) + int prog_fd, struct bpf_prog *prog, __u32 flags) { struct bpf_link_primer link_primer; struct hid_bpf_link *link; - struct bpf_prog *prog = NULL; struct hid_bpf_prog_entry *prog_entry; int cnt, err = -EINVAL, prog_table_idx = -1; - /* take a ref on the prog itself */ - prog = bpf_prog_get(prog_fd); - if (IS_ERR(prog)) - return PTR_ERR(prog); - mutex_lock(&hid_bpf_attach_lock); link = kzalloc(sizeof(*link), GFP_USER); @@ -467,7 +468,6 @@ __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, err_unlock: mutex_unlock(&hid_bpf_attach_lock); - bpf_prog_put(prog); kfree(link); return err; diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index fb30e228d35f..828a5c022c64 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -298,6 +298,9 @@ #define USB_VENDOR_ID_CIDC 0x1677 +#define I2C_VENDOR_ID_CIRQUE 0x0488 +#define I2C_PRODUCT_ID_CIRQUE_1063 0x1063 + #define USB_VENDOR_ID_CJTOUCH 0x24b8 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0020 0x0020 #define USB_DEVICE_ID_CJTOUCH_MULTI_TOUCH_0040 0x0040 diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index fd6d8f1d9b8f..6ef0c88e3e60 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4610,6 +4610,8 @@ static const struct hid_device_id hidpp_devices[] = { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC088) }, { /* Logitech G Pro X Superlight Gaming Mouse over USB */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC094) }, + { /* Logitech G Pro X Superlight 2 Gaming Mouse over USB */ + HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC09b) }, { /* G935 Gaming Headset */ HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0x0a87), diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c index 82d0a77359c4..58b15750dbb0 100644 --- a/drivers/hid/hid-nvidia-shield.c +++ b/drivers/hid/hid-nvidia-shield.c @@ -800,6 +800,8 @@ static inline int thunderstrike_led_create(struct thunderstrike *ts) led->name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike%d:blue:led", ts->id); + if (!led->name) + return -ENOMEM; led->max_brightness = 1; led->flags = LED_CORE_SUSPENDRESUME | LED_RETAIN_AT_SHUTDOWN; led->brightness_get = &thunderstrike_led_get_brightness; @@ -831,6 +833,8 @@ static inline int thunderstrike_psy_create(struct shield_device *shield_dev) shield_dev->battery_dev.desc.name = devm_kasprintf(&ts->base.hdev->dev, GFP_KERNEL, "thunderstrike_%d", ts->id); + if (!shield_dev->battery_dev.desc.name) + return -ENOMEM; shield_dev->battery_dev.psy = power_supply_register( &hdev->dev, &shield_dev->battery_dev.desc, &psy_cfg); diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index b3c4e50e248a..b08a5ab58528 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -1109,10 +1109,9 @@ static int steam_probe(struct hid_device *hdev, return hid_hw_start(hdev, HID_CONNECT_DEFAULT); steam = devm_kzalloc(&hdev->dev, sizeof(*steam), GFP_KERNEL); - if (!steam) { - ret = -ENOMEM; - goto steam_alloc_fail; - } + if (!steam) + return -ENOMEM; + steam->hdev = hdev; hid_set_drvdata(hdev, steam); spin_lock_init(&steam->lock); @@ -1129,14 +1128,14 @@ static int steam_probe(struct hid_device *hdev, */ ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_HIDRAW); if (ret) - goto hid_hw_start_fail; + goto err_cancel_work; ret = hid_hw_open(hdev); if (ret) { hid_err(hdev, "%s:hid_hw_open\n", __func__); - goto hid_hw_open_fail; + goto err_hw_stop; } if (steam->quirks & STEAM_QUIRK_WIRELESS) { @@ -1152,36 +1151,37 @@ static int steam_probe(struct hid_device *hdev, hid_err(hdev, "%s:steam_register failed with error %d\n", __func__, ret); - goto input_register_fail; + goto err_hw_close; } } steam->client_hdev = steam_create_client_hid(hdev); if (IS_ERR(steam->client_hdev)) { ret = PTR_ERR(steam->client_hdev); - goto client_hdev_fail; + goto err_stream_unregister; } steam->client_hdev->driver_data = steam; ret = hid_add_device(steam->client_hdev); if (ret) - goto client_hdev_add_fail; + goto err_destroy; return 0; -client_hdev_add_fail: - hid_hw_stop(hdev); -client_hdev_fail: +err_destroy: hid_destroy_device(steam->client_hdev); -input_register_fail: -hid_hw_open_fail: -hid_hw_start_fail: +err_stream_unregister: + if (steam->connected) + steam_unregister(steam); +err_hw_close: + hid_hw_close(hdev); +err_hw_stop: + hid_hw_stop(hdev); +err_cancel_work: cancel_work_sync(&steam->work_connect); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->rumble_work); -steam_alloc_fail: - hid_err(hdev, "%s: failed with error %d\n", - __func__, ret); + return ret; } diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 13c8dd8cd350..2bc762d31ac7 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -357,8 +357,11 @@ static int hidraw_release(struct inode * inode, struct file * file) down_write(&minors_rwsem); spin_lock_irqsave(&hidraw_table[minor]->list_lock, flags); - for (int i = list->tail; i < list->head; i++) - kfree(list->buffer[i].value); + while (list->tail != list->head) { + kfree(list->buffer[list->tail].value); + list->buffer[list->tail].value = NULL; + list->tail = (list->tail + 1) & (HIDRAW_BUFFER_SIZE - 1); + } list_del(&list->node); spin_unlock_irqrestore(&hidraw_table[minor]->list_lock, flags); kfree(list); diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 90f316ae9819..2df1ab3c31cc 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -49,6 +49,7 @@ #define I2C_HID_QUIRK_RESET_ON_RESUME BIT(2) #define I2C_HID_QUIRK_BAD_INPUT_SIZE BIT(3) #define I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET BIT(4) +#define I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND BIT(5) /* Command opcodes */ #define I2C_HID_OPCODE_RESET 0x01 @@ -131,6 +132,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_RESET_ON_RESUME }, { USB_VENDOR_ID_ITE, I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720, I2C_HID_QUIRK_BAD_INPUT_SIZE }, + { I2C_VENDOR_ID_CIRQUE, I2C_PRODUCT_ID_CIRQUE_1063, + I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND }, /* * Sending the wakeup after reset actually break ELAN touchscreen controller */ @@ -956,7 +959,8 @@ static int i2c_hid_core_suspend(struct i2c_hid *ihid, bool force_poweroff) return ret; /* Save some power */ - i2c_hid_set_power(ihid, I2C_HID_PWR_SLEEP); + if (!(ihid->quirks & I2C_HID_QUIRK_NO_SLEEP_ON_SUSPEND)) + i2c_hid_set_power(ihid, I2C_HID_PWR_SLEEP); disable_irq(client->irq); diff --git a/drivers/hid/i2c-hid/i2c-hid-of.c b/drivers/hid/i2c-hid/i2c-hid-of.c index c4e1fa0273c8..8be4d576da77 100644 --- a/drivers/hid/i2c-hid/i2c-hid-of.c +++ b/drivers/hid/i2c-hid/i2c-hid-of.c @@ -87,6 +87,7 @@ static int i2c_hid_of_probe(struct i2c_client *client) if (!ihid_of) return -ENOMEM; + ihid_of->client = client; ihid_of->ops.power_up = i2c_hid_of_power_up; ihid_of->ops.power_down = i2c_hid_of_power_down; diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 391c4dbdff42..3c1f657593a8 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2838,8 +2838,7 @@ static void mt753x_phylink_mac_link_up(struct dsa_switch *ds, int port, /* MT753x MAC works in 1G full duplex mode for all up-clocked * variants. */ - if (interface == PHY_INTERFACE_MODE_INTERNAL || - interface == PHY_INTERFACE_MODE_TRGMII || + if (interface == PHY_INTERFACE_MODE_TRGMII || (phy_interface_mode_is_8023z(interface))) { speed = SPEED_1000; duplex = DUPLEX_FULL; diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 383b3c4d6f59..614cabb5c1b0 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3659,7 +3659,7 @@ static int mv88e6xxx_mdio_read_c45(struct mii_bus *bus, int phy, int devad, int err; if (!chip->info->ops->phy_read_c45) - return -EOPNOTSUPP; + return 0xffff; mv88e6xxx_reg_lock(chip); err = chip->info->ops->phy_read_c45(chip, bus, phy, devad, reg, &val); diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index c51f40960961..7a864329cb72 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -2051,12 +2051,11 @@ qca8k_sw_probe(struct mdio_device *mdiodev) priv->info = of_device_get_match_data(priv->dev); priv->reset_gpio = devm_gpiod_get_optional(priv->dev, "reset", - GPIOD_ASIS); + GPIOD_OUT_HIGH); if (IS_ERR(priv->reset_gpio)) return PTR_ERR(priv->reset_gpio); if (priv->reset_gpio) { - gpiod_set_value_cansleep(priv->reset_gpio, 1); /* The active low duration must be greater than 10 ms * and checkpatch.pl wants 20 ms. */ diff --git a/drivers/net/ethernet/amd/pds_core/adminq.c b/drivers/net/ethernet/amd/pds_core/adminq.c index 5beadabc2136..ea773cfa0af6 100644 --- a/drivers/net/ethernet/amd/pds_core/adminq.c +++ b/drivers/net/ethernet/amd/pds_core/adminq.c @@ -63,6 +63,15 @@ static int pdsc_process_notifyq(struct pdsc_qcq *qcq) return nq_work; } +static bool pdsc_adminq_inc_if_up(struct pdsc *pdsc) +{ + if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER) || + pdsc->state & BIT_ULL(PDSC_S_FW_DEAD)) + return false; + + return refcount_inc_not_zero(&pdsc->adminq_refcnt); +} + void pdsc_process_adminq(struct pdsc_qcq *qcq) { union pds_core_adminq_comp *comp; @@ -75,9 +84,9 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq) int aq_work = 0; int credits; - /* Don't process AdminQ when shutting down */ - if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) { - dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n", + /* Don't process AdminQ when it's not up */ + if (!pdsc_adminq_inc_if_up(pdsc)) { + dev_err(pdsc->dev, "%s: called while adminq is unavailable\n", __func__); return; } @@ -124,6 +133,7 @@ credits: pds_core_intr_credits(&pdsc->intr_ctrl[qcq->intx], credits, PDS_CORE_INTR_CRED_REARM); + refcount_dec(&pdsc->adminq_refcnt); } void pdsc_work_thread(struct work_struct *work) @@ -135,18 +145,20 @@ void pdsc_work_thread(struct work_struct *work) irqreturn_t pdsc_adminq_isr(int irq, void *data) { - struct pdsc_qcq *qcq = data; - struct pdsc *pdsc = qcq->pdsc; + struct pdsc *pdsc = data; + struct pdsc_qcq *qcq; - /* Don't process AdminQ when shutting down */ - if (pdsc->state & BIT_ULL(PDSC_S_STOPPING_DRIVER)) { - dev_err(pdsc->dev, "%s: called while PDSC_S_STOPPING_DRIVER\n", + /* Don't process AdminQ when it's not up */ + if (!pdsc_adminq_inc_if_up(pdsc)) { + dev_err(pdsc->dev, "%s: called while adminq is unavailable\n", __func__); return IRQ_HANDLED; } + qcq = &pdsc->adminqcq; queue_work(pdsc->wq, &qcq->work); pds_core_intr_mask(&pdsc->intr_ctrl[qcq->intx], PDS_CORE_INTR_MASK_CLEAR); + refcount_dec(&pdsc->adminq_refcnt); return IRQ_HANDLED; } @@ -179,10 +191,16 @@ static int __pdsc_adminq_post(struct pdsc *pdsc, /* Check that the FW is running */ if (!pdsc_is_fw_running(pdsc)) { - u8 fw_status = ioread8(&pdsc->info_regs->fw_status); - - dev_info(pdsc->dev, "%s: post failed - fw not running %#02x:\n", - __func__, fw_status); + if (pdsc->info_regs) { + u8 fw_status = + ioread8(&pdsc->info_regs->fw_status); + + dev_info(pdsc->dev, "%s: post failed - fw not running %#02x:\n", + __func__, fw_status); + } else { + dev_info(pdsc->dev, "%s: post failed - BARs not setup\n", + __func__); + } ret = -ENXIO; goto err_out_unlock; @@ -230,6 +248,12 @@ int pdsc_adminq_post(struct pdsc *pdsc, int err = 0; int index; + if (!pdsc_adminq_inc_if_up(pdsc)) { + dev_dbg(pdsc->dev, "%s: preventing adminq cmd %u\n", + __func__, cmd->opcode); + return -ENXIO; + } + wc.qcq = &pdsc->adminqcq; index = __pdsc_adminq_post(pdsc, &pdsc->adminqcq, cmd, comp, &wc); if (index < 0) { @@ -248,10 +272,16 @@ int pdsc_adminq_post(struct pdsc *pdsc, break; if (!pdsc_is_fw_running(pdsc)) { - u8 fw_status = ioread8(&pdsc->info_regs->fw_status); - - dev_dbg(pdsc->dev, "%s: post wait failed - fw not running %#02x:\n", - __func__, fw_status); + if (pdsc->info_regs) { + u8 fw_status = + ioread8(&pdsc->info_regs->fw_status); + + dev_dbg(pdsc->dev, "%s: post wait failed - fw not running %#02x:\n", + __func__, fw_status); + } else { + dev_dbg(pdsc->dev, "%s: post wait failed - BARs not setup\n", + __func__); + } err = -ENXIO; break; } @@ -285,6 +315,8 @@ err_out: queue_work(pdsc->wq, &pdsc->health_work); } + refcount_dec(&pdsc->adminq_refcnt); + return err; } EXPORT_SYMBOL_GPL(pdsc_adminq_post); diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c index 0d2091e9eb28..7658a7286767 100644 --- a/drivers/net/ethernet/amd/pds_core/core.c +++ b/drivers/net/ethernet/amd/pds_core/core.c @@ -125,7 +125,7 @@ static int pdsc_qcq_intr_alloc(struct pdsc *pdsc, struct pdsc_qcq *qcq) snprintf(name, sizeof(name), "%s-%d-%s", PDS_CORE_DRV_NAME, pdsc->pdev->bus->number, qcq->q.name); - index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, qcq); + index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, pdsc); if (index < 0) return index; qcq->intx = index; @@ -404,10 +404,7 @@ int pdsc_setup(struct pdsc *pdsc, bool init) int numdescs; int err; - if (init) - err = pdsc_dev_init(pdsc); - else - err = pdsc_dev_reinit(pdsc); + err = pdsc_dev_init(pdsc); if (err) return err; @@ -450,6 +447,7 @@ int pdsc_setup(struct pdsc *pdsc, bool init) pdsc_debugfs_add_viftype(pdsc); } + refcount_set(&pdsc->adminq_refcnt, 1); clear_bit(PDSC_S_FW_DEAD, &pdsc->state); return 0; @@ -464,6 +462,8 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) if (!pdsc->pdev->is_virtfn) pdsc_devcmd_reset(pdsc); + if (pdsc->adminqcq.work.func) + cancel_work_sync(&pdsc->adminqcq.work); pdsc_qcq_free(pdsc, &pdsc->notifyqcq); pdsc_qcq_free(pdsc, &pdsc->adminqcq); @@ -476,10 +476,9 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) for (i = 0; i < pdsc->nintrs; i++) pdsc_intr_free(pdsc, i); - if (removing) { - kfree(pdsc->intr_info); - pdsc->intr_info = NULL; - } + kfree(pdsc->intr_info); + pdsc->intr_info = NULL; + pdsc->nintrs = 0; } if (pdsc->kern_dbpage) { @@ -487,6 +486,7 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing) pdsc->kern_dbpage = NULL; } + pci_free_irq_vectors(pdsc->pdev); set_bit(PDSC_S_FW_DEAD, &pdsc->state); } @@ -512,6 +512,24 @@ void pdsc_stop(struct pdsc *pdsc) PDS_CORE_INTR_MASK_SET); } +static void pdsc_adminq_wait_and_dec_once_unused(struct pdsc *pdsc) +{ + /* The driver initializes the adminq_refcnt to 1 when the adminq is + * allocated and ready for use. Other users/requesters will increment + * the refcnt while in use. If the refcnt is down to 1 then the adminq + * is not in use and the refcnt can be cleared and adminq freed. Before + * calling this function the driver will set PDSC_S_FW_DEAD, which + * prevent subsequent attempts to use the adminq and increment the + * refcnt to fail. This guarantees that this function will eventually + * exit. + */ + while (!refcount_dec_if_one(&pdsc->adminq_refcnt)) { + dev_dbg_ratelimited(pdsc->dev, "%s: adminq in use\n", + __func__); + cpu_relax(); + } +} + void pdsc_fw_down(struct pdsc *pdsc) { union pds_core_notifyq_comp reset_event = { @@ -527,6 +545,8 @@ void pdsc_fw_down(struct pdsc *pdsc) if (pdsc->pdev->is_virtfn) return; + pdsc_adminq_wait_and_dec_once_unused(pdsc); + /* Notify clients of fw_down */ if (pdsc->fw_reporter) devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc); @@ -577,7 +597,13 @@ err_out: static void pdsc_check_pci_health(struct pdsc *pdsc) { - u8 fw_status = ioread8(&pdsc->info_regs->fw_status); + u8 fw_status; + + /* some sort of teardown already in progress */ + if (!pdsc->info_regs) + return; + + fw_status = ioread8(&pdsc->info_regs->fw_status); /* is PCI broken? */ if (fw_status != PDS_RC_BAD_PCI) diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h index e35d3e7006bf..110c4b826b22 100644 --- a/drivers/net/ethernet/amd/pds_core/core.h +++ b/drivers/net/ethernet/amd/pds_core/core.h @@ -184,6 +184,7 @@ struct pdsc { struct mutex devcmd_lock; /* lock for dev_cmd operations */ struct mutex config_lock; /* lock for configuration operations */ spinlock_t adminq_lock; /* lock for adminq operations */ + refcount_t adminq_refcnt; struct pds_core_dev_info_regs __iomem *info_regs; struct pds_core_dev_cmd_regs __iomem *cmd_regs; struct pds_core_intr __iomem *intr_ctrl; @@ -280,7 +281,6 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd, union pds_core_dev_comp *comp, int max_seconds); int pdsc_devcmd_init(struct pdsc *pdsc); int pdsc_devcmd_reset(struct pdsc *pdsc); -int pdsc_dev_reinit(struct pdsc *pdsc); int pdsc_dev_init(struct pdsc *pdsc); void pdsc_reset_prepare(struct pci_dev *pdev); diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c index 8ec392299b7d..4e8579ca1c8c 100644 --- a/drivers/net/ethernet/amd/pds_core/debugfs.c +++ b/drivers/net/ethernet/amd/pds_core/debugfs.c @@ -64,6 +64,10 @@ DEFINE_SHOW_ATTRIBUTE(identity); void pdsc_debugfs_add_ident(struct pdsc *pdsc) { + /* This file will already exist in the reset flow */ + if (debugfs_lookup("identity", pdsc->dentry)) + return; + debugfs_create_file("identity", 0400, pdsc->dentry, pdsc, &identity_fops); } diff --git a/drivers/net/ethernet/amd/pds_core/dev.c b/drivers/net/ethernet/amd/pds_core/dev.c index 31940b857e0e..e65a1632df50 100644 --- a/drivers/net/ethernet/amd/pds_core/dev.c +++ b/drivers/net/ethernet/amd/pds_core/dev.c @@ -57,6 +57,9 @@ int pdsc_err_to_errno(enum pds_core_status_code code) bool pdsc_is_fw_running(struct pdsc *pdsc) { + if (!pdsc->info_regs) + return false; + pdsc->fw_status = ioread8(&pdsc->info_regs->fw_status); pdsc->last_fw_time = jiffies; pdsc->last_hb = ioread32(&pdsc->info_regs->fw_heartbeat); @@ -182,13 +185,17 @@ int pdsc_devcmd_locked(struct pdsc *pdsc, union pds_core_dev_cmd *cmd, { int err; + if (!pdsc->cmd_regs) + return -ENXIO; + memcpy_toio(&pdsc->cmd_regs->cmd, cmd, sizeof(*cmd)); pdsc_devcmd_dbell(pdsc); err = pdsc_devcmd_wait(pdsc, cmd->opcode, max_seconds); - memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp)); if ((err == -ENXIO || err == -ETIMEDOUT) && pdsc->wq) queue_work(pdsc->wq, &pdsc->health_work); + else + memcpy_fromio(comp, &pdsc->cmd_regs->comp, sizeof(*comp)); return err; } @@ -309,13 +316,6 @@ static int pdsc_identify(struct pdsc *pdsc) return 0; } -int pdsc_dev_reinit(struct pdsc *pdsc) -{ - pdsc_init_devinfo(pdsc); - - return pdsc_identify(pdsc); -} - int pdsc_dev_init(struct pdsc *pdsc) { unsigned int nintrs; diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c index e9948ea5bbcd..54864f27c87a 100644 --- a/drivers/net/ethernet/amd/pds_core/devlink.c +++ b/drivers/net/ethernet/amd/pds_core/devlink.c @@ -111,7 +111,8 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req, mutex_lock(&pdsc->devcmd_lock); err = pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout * 2); - memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list)); + if (!err) + memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list)); mutex_unlock(&pdsc->devcmd_lock); if (err && err != -EIO) return err; diff --git a/drivers/net/ethernet/amd/pds_core/fw.c b/drivers/net/ethernet/amd/pds_core/fw.c index 90a811f3878a..fa626719e68d 100644 --- a/drivers/net/ethernet/amd/pds_core/fw.c +++ b/drivers/net/ethernet/amd/pds_core/fw.c @@ -107,6 +107,9 @@ int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw, dev_info(pdsc->dev, "Installing firmware\n"); + if (!pdsc->cmd_regs) + return -ENXIO; + dl = priv_to_devlink(pdsc); devlink_flash_update_status_notify(dl, "Preparing to flash", NULL, 0, 0); diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 3080898d7b95..cdbf053b5376 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -37,6 +37,11 @@ static void pdsc_unmap_bars(struct pdsc *pdsc) struct pdsc_dev_bar *bars = pdsc->bars; unsigned int i; + pdsc->info_regs = NULL; + pdsc->cmd_regs = NULL; + pdsc->intr_status = NULL; + pdsc->intr_ctrl = NULL; + for (i = 0; i < PDS_CORE_BARS_MAX; i++) { if (bars[i].vaddr) pci_iounmap(pdsc->pdev, bars[i].vaddr); @@ -293,7 +298,7 @@ err_out_stop: err_out_teardown: pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING); err_out_unmap_bars: - del_timer_sync(&pdsc->wdtimer); + timer_shutdown_sync(&pdsc->wdtimer); if (pdsc->wq) destroy_workqueue(pdsc->wq); mutex_destroy(&pdsc->config_lock); @@ -420,7 +425,7 @@ static void pdsc_remove(struct pci_dev *pdev) */ pdsc_sriov_configure(pdev, 0); - del_timer_sync(&pdsc->wdtimer); + timer_shutdown_sync(&pdsc->wdtimer); if (pdsc->wq) destroy_workqueue(pdsc->wq); @@ -433,7 +438,6 @@ static void pdsc_remove(struct pci_dev *pdev) mutex_destroy(&pdsc->config_lock); mutex_destroy(&pdsc->devcmd_lock); - pci_free_irq_vectors(pdev); pdsc_unmap_bars(pdsc); pci_release_regions(pdev); } @@ -445,13 +449,26 @@ static void pdsc_remove(struct pci_dev *pdev) devlink_free(dl); } +static void pdsc_stop_health_thread(struct pdsc *pdsc) +{ + timer_shutdown_sync(&pdsc->wdtimer); + if (pdsc->health_work.func) + cancel_work_sync(&pdsc->health_work); +} + +static void pdsc_restart_health_thread(struct pdsc *pdsc) +{ + timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0); + mod_timer(&pdsc->wdtimer, jiffies + 1); +} + void pdsc_reset_prepare(struct pci_dev *pdev) { struct pdsc *pdsc = pci_get_drvdata(pdev); + pdsc_stop_health_thread(pdsc); pdsc_fw_down(pdsc); - pci_free_irq_vectors(pdev); pdsc_unmap_bars(pdsc); pci_release_regions(pdev); pci_disable_device(pdev); @@ -486,6 +503,7 @@ void pdsc_reset_done(struct pci_dev *pdev) } pdsc_fw_up(pdsc); + pdsc_restart_health_thread(pdsc); } static const struct pci_error_handlers pdsc_err_handler = { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index adad188e38b8..cc07660330f5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -684,7 +684,7 @@ static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) timestamp.hwtstamp = ns_to_ktime(ns); skb_tstamp_tx(ptp->tx_skb, ×tamp); } else { - netdev_WARN_ONCE(bp->dev, + netdev_warn_once(bp->dev, "TS query for TX timer failed rc = %x\n", rc); } diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 7a8dc5386fff..76615d47e055 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -356,7 +356,7 @@ static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, struct gve_rx_slot_page_info *page_info, - u16 packet_buffer_size, u16 len, + unsigned int truesize, u16 len, struct gve_rx_ctx *ctx) { u32 offset = page_info->page_offset + page_info->pad; @@ -389,10 +389,10 @@ static struct sk_buff *gve_rx_add_frags(struct napi_struct *napi, if (skb != ctx->skb_head) { ctx->skb_head->len += len; ctx->skb_head->data_len += len; - ctx->skb_head->truesize += packet_buffer_size; + ctx->skb_head->truesize += truesize; } skb_add_rx_frag(skb, num_frags, page_info->page, - offset, len, packet_buffer_size); + offset, len, truesize); return ctx->skb_head; } @@ -486,7 +486,7 @@ static struct sk_buff *gve_rx_copy_to_pool(struct gve_rx_ring *rx, memcpy(alloc_page_info.page_address, src, page_info->pad + len); skb = gve_rx_add_frags(napi, &alloc_page_info, - rx->packet_buffer_size, + PAGE_SIZE, len, ctx); u64_stats_update_begin(&rx->statss); diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index a187582d2299..ba9c19e6994c 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -360,23 +360,43 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca); * As a result, a shift of INCVALUE_SHIFT_n is used to fit a value of * INCVALUE_n into the TIMINCA register allowing 32+8+(24-INCVALUE_SHIFT_n) * bits to count nanoseconds leaving the rest for fractional nonseconds. + * + * Any given INCVALUE also has an associated maximum adjustment value. This + * maximum adjustment value is the largest increase (or decrease) which can be + * safely applied without overflowing the INCVALUE. Since INCVALUE has + * a maximum range of 24 bits, its largest value is 0xFFFFFF. + * + * To understand where the maximum value comes from, consider the following + * equation: + * + * new_incval = base_incval + (base_incval * adjustment) / 1billion + * + * To avoid overflow that means: + * max_incval = base_incval + (base_incval * max_adj) / billion + * + * Re-arranging: + * max_adj = floor(((max_incval - base_incval) * 1billion) / 1billion) */ #define INCVALUE_96MHZ 125 #define INCVALUE_SHIFT_96MHZ 17 #define INCPERIOD_SHIFT_96MHZ 2 #define INCPERIOD_96MHZ (12 >> INCPERIOD_SHIFT_96MHZ) +#define MAX_PPB_96MHZ 23999900 /* 23,999,900 ppb */ #define INCVALUE_25MHZ 40 #define INCVALUE_SHIFT_25MHZ 18 #define INCPERIOD_25MHZ 1 +#define MAX_PPB_25MHZ 599999900 /* 599,999,900 ppb */ #define INCVALUE_24MHZ 125 #define INCVALUE_SHIFT_24MHZ 14 #define INCPERIOD_24MHZ 3 +#define MAX_PPB_24MHZ 999999999 /* 999,999,999 ppb */ #define INCVALUE_38400KHZ 26 #define INCVALUE_SHIFT_38400KHZ 19 #define INCPERIOD_38400KHZ 1 +#define MAX_PPB_38400KHZ 230769100 /* 230,769,100 ppb */ /* Another drawback of scaling the incvalue by a large factor is the * 64-bit SYSTIM register overflows more quickly. This is dealt with diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 02d871bc112a..bbcfd529399b 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -280,8 +280,17 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) switch (hw->mac.type) { case e1000_pch2lan: + adapter->ptp_clock_info.max_adj = MAX_PPB_96MHZ; + break; case e1000_pch_lpt: + if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) + adapter->ptp_clock_info.max_adj = MAX_PPB_96MHZ; + else + adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ; + break; case e1000_pch_spt: + adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ; + break; case e1000_pch_cnp: case e1000_pch_tgp: case e1000_pch_adp: @@ -289,15 +298,14 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) case e1000_pch_lnp: case e1000_pch_ptp: case e1000_pch_nvp: - if ((hw->mac.type < e1000_pch_lpt) || - (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI)) { - adapter->ptp_clock_info.max_adj = 24000000 - 1; - break; - } - fallthrough; + if (er32(TSYNCRXCTL) & E1000_TSYNCRXCTL_SYSCFI) + adapter->ptp_clock_info.max_adj = MAX_PPB_24MHZ; + else + adapter->ptp_clock_info.max_adj = MAX_PPB_38400KHZ; + break; case e1000_82574: case e1000_82583: - adapter->ptp_clock_info.max_adj = 600000000 - 1; + adapter->ptp_clock_info.max_adj = MAX_PPB_25MHZ; break; default: break; diff --git a/drivers/net/ethernet/intel/idpf/virtchnl2.h b/drivers/net/ethernet/intel/idpf/virtchnl2.h index 8dc837889723..4a3c4454d25a 100644 --- a/drivers/net/ethernet/intel/idpf/virtchnl2.h +++ b/drivers/net/ethernet/intel/idpf/virtchnl2.h @@ -978,7 +978,7 @@ struct virtchnl2_ptype { u8 proto_id_count; __le16 pad; __le16 proto_id[]; -}; +} __packed __aligned(2); VIRTCHNL2_CHECK_STRUCT_LEN(6, virtchnl2_ptype); /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 6208923e29a2..c1adc94a5a65 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -716,7 +716,8 @@ static s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, if ((command & IXGBE_SB_IOSF_CTRL_RESP_STAT_MASK) != 0) { error = FIELD_GET(IXGBE_SB_IOSF_CTRL_CMPL_ERR_MASK, command); hw_dbg(hw, "Failed to read, error %x\n", error); - return -EIO; + ret = -EIO; + goto out; } if (!ret) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 2928898c7f8d..7f786de61014 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -314,7 +314,6 @@ static int otx2_set_channels(struct net_device *dev, pfvf->hw.tx_queues = channel->tx_count; if (pfvf->xdp_prog) pfvf->hw.xdp_queues = channel->rx_count; - pfvf->hw.non_qos_queues = pfvf->hw.tx_queues + pfvf->hw.xdp_queues; if (if_up) err = dev->netdev_ops->ndo_open(dev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index a57455aebff6..e5fe67e73865 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1744,6 +1744,7 @@ int otx2_open(struct net_device *netdev) /* RQ and SQs are mapped to different CQs, * so find out max CQ IRQs (i.e CINTs) needed. */ + pf->hw.non_qos_queues = pf->hw.tx_queues + pf->hw.xdp_queues; pf->hw.cint_cnt = max3(pf->hw.rx_queues, pf->hw.tx_queues, pf->hw.tc_tx_queues); @@ -2643,8 +2644,6 @@ static int otx2_xdp_setup(struct otx2_nic *pf, struct bpf_prog *prog) xdp_features_clear_redirect_target(dev); } - pf->hw.non_qos_queues += pf->hw.xdp_queues; - if (if_up) otx2_open(pf->netdev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 4d519ea833b2..f828d32737af 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -1403,7 +1403,7 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, bool *need_xdp_flush) { - unsigned char *hard_start, *data; + unsigned char *hard_start; int qidx = cq->cq_idx; struct xdp_buff xdp; struct page *page; @@ -1417,9 +1417,8 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, xdp_init_buff(&xdp, pfvf->rbsize, &cq->xdp_rxq); - data = (unsigned char *)phys_to_virt(pa); - hard_start = page_address(page); - xdp_prepare_buff(&xdp, hard_start, data - hard_start, + hard_start = (unsigned char *)phys_to_virt(pa); + xdp_prepare_buff(&xdp, hard_start, OTX2_HEAD_ROOM, cqe->sg.seg_size, false); act = bpf_prog_run_xdp(prog, &xdp); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index a6e91573f8da..de123350bd46 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4761,7 +4761,10 @@ static int mtk_probe(struct platform_device *pdev) } if (MTK_HAS_CAPS(eth->soc->caps, MTK_36BIT_DMA)) { - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(36)); + err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(36)); + if (!err) + err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (err) { dev_err(&pdev->dev, "Wrong DMA config\n"); return -EINVAL; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 92108d354051..2e83bbb9477e 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -168,9 +168,10 @@ static void lan966x_port_link_up(struct lan966x_port *port) lan966x_taprio_speed_set(port, config->speed); /* Also the GIGA_MODE_ENA(1) needs to be set regardless of the - * port speed for QSGMII ports. + * port speed for QSGMII or SGMII ports. */ - if (phy_interface_num_ports(config->portmode) == 4) + if (phy_interface_num_ports(config->portmode) == 4 || + config->portmode == PHY_INTERFACE_MODE_SGMII) mode = DEV_MAC_MODE_CFG_GIGA_MODE_ENA_SET(1); lan_wr(config->duplex | mode, diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index 2967bab72505..15180538b80a 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1424,10 +1424,30 @@ static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_ mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask); return; + /* Both struct tcphdr and struct udphdr start with + * __be16 source; + * __be16 dest; + * so we can use the same code for both. + */ case FLOW_ACT_MANGLE_HDR_TYPE_TCP: case FLOW_ACT_MANGLE_HDR_TYPE_UDP: - mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val); - mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask); + if (mangle_action->mangle.offset == offsetof(struct tcphdr, source)) { + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val << 16); + /* The mask of mangle action is inverse mask, + * so clear the dest tp port with 0xFFFF to + * instead of rotate-left operation. + */ + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask << 16 | 0xFFFF); + } + if (mangle_action->mangle.offset == offsetof(struct tcphdr, dest)) { + mangle_action->mangle.offset = 0; + mangle_action->mangle.val = + (__force u32)cpu_to_be32(mangle_action->mangle.val); + mangle_action->mangle.mask = + (__force u32)cpu_to_be32(mangle_action->mangle.mask); + } return; default: @@ -1864,10 +1884,30 @@ int nfp_fl_ct_handle_post_ct(struct nfp_flower_priv *priv, { struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct nfp_fl_ct_flow_entry *ct_entry; + struct flow_action_entry *ct_goto; struct nfp_fl_ct_zone_entry *zt; + struct flow_action_entry *act; bool wildcarded = false; struct flow_match_ct ct; - struct flow_action_entry *ct_goto; + int i; + + flow_action_for_each(i, act, &rule->action) { + switch (act->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_REDIRECT_INGRESS: + case FLOW_ACTION_MIRRED: + case FLOW_ACTION_MIRRED_INGRESS: + if (act->dev->rtnl_link_ops && + !strcmp(act->dev->rtnl_link_ops->kind, "openvswitch")) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: out port is openvswitch internal port"); + return -EOPNOTSUPP; + } + break; + default: + break; + } + } flow_rule_match_ct(rule, &ct); if (!ct.mask->ct_zone) { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 8f730ada71f9..6b65420e11b5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -353,6 +353,10 @@ static int imx_dwmac_probe(struct platform_device *pdev) if (data->flags & STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY) plat_dat->flags |= STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY; + /* Default TX Q0 to use TSO and rest TXQ for TBS */ + for (int i = 1; i < plat_dat->tx_queues_to_use; i++) + plat_dat->tx_queues_cfg[i].tbs_en = 1; + plat_dat->host_dma_width = dwmac->ops->addr_width; plat_dat->init = imx_dwmac_init; plat_dat->exit = imx_dwmac_exit; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index b334eb16da23..25519952f754 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3932,6 +3932,9 @@ static int __stmmac_open(struct net_device *dev, priv->rx_copybreak = STMMAC_RX_COPYBREAK; buf_sz = dma_conf->dma_buf_sz; + for (int i = 0; i < MTL_MAX_TX_QUEUES; i++) + if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN) + dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs; memcpy(&priv->dma_conf, dma_conf, sizeof(*dma_conf)); stmmac_reset_queues_param(priv); diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 1dafa44155d0..a6fcbda64ecc 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -708,7 +708,10 @@ void netvsc_device_remove(struct hv_device *device) /* Disable NAPI and disassociate its context from the device. */ for (i = 0; i < net_device->num_chn; i++) { /* See also vmbus_reset_channel_cb(). */ - napi_disable(&net_device->chan_table[i].napi); + /* only disable enabled NAPI channel */ + if (i < ndev->real_num_rx_queues) + napi_disable(&net_device->chan_table[i].napi); + netif_napi_del(&net_device->chan_table[i].napi); } diff --git a/drivers/net/phy/mediatek-ge-soc.c b/drivers/net/phy/mediatek-ge-soc.c index 8a20d9889f10..0f3a1538a8b8 100644 --- a/drivers/net/phy/mediatek-ge-soc.c +++ b/drivers/net/phy/mediatek-ge-soc.c @@ -489,7 +489,7 @@ static int tx_r50_fill_result(struct phy_device *phydev, u16 tx_r50_cal_val, u16 reg, val; if (phydev->drv->phy_id == MTK_GPHY_ID_MT7988) - bias = -2; + bias = -1; val = clamp_val(bias + tx_r50_cal_val, 0, 63); @@ -705,6 +705,11 @@ restore: static void mt798x_phy_common_finetune(struct phy_device *phydev) { phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); + /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */ + __phy_write(phydev, 0x11, 0xc71); + __phy_write(phydev, 0x12, 0xc); + __phy_write(phydev, 0x10, 0x8fae); + /* EnabRandUpdTrig = 1 */ __phy_write(phydev, 0x11, 0x2f00); __phy_write(phydev, 0x12, 0xe); @@ -715,15 +720,56 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x83aa); - /* TrFreeze = 0 */ + /* FfeUpdGainForce = 1(Enable), FfeUpdGainForceVal = 4 */ + __phy_write(phydev, 0x11, 0x240); + __phy_write(phydev, 0x12, 0x0); + __phy_write(phydev, 0x10, 0x9680); + + /* TrFreeze = 0 (mt7988 default) */ __phy_write(phydev, 0x11, 0x0); __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x9686); + /* SSTrKp100 = 5 */ + /* SSTrKf100 = 6 */ + /* SSTrKp1000Mas = 5 */ + /* SSTrKf1000Mas = 6 */ /* SSTrKp1000Slv = 5 */ + /* SSTrKf1000Slv = 6 */ __phy_write(phydev, 0x11, 0xbaef); __phy_write(phydev, 0x12, 0x2e); __phy_write(phydev, 0x10, 0x968c); + phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); +} + +static void mt7981_phy_finetune(struct phy_device *phydev) +{ + u16 val[8] = { 0x01ce, 0x01c1, + 0x020f, 0x0202, + 0x03d0, 0x03c0, + 0x0013, 0x0005 }; + int i, k; + + /* 100M eye finetune: + * Keep middle level of TX MLT3 shapper as default. + * Only change TX MLT3 overshoot level here. + */ + for (k = 0, i = 1; i < 12; i++) { + if (i % 3 == 0) + continue; + phy_write_mmd(phydev, MDIO_MMD_VEND1, i, val[k++]); + } + + phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); + /* ResetSyncOffset = 6 */ + __phy_write(phydev, 0x11, 0x600); + __phy_write(phydev, 0x12, 0x0); + __phy_write(phydev, 0x10, 0x8fc0); + + /* VgaDecRate = 1 */ + __phy_write(phydev, 0x11, 0x4c2a); + __phy_write(phydev, 0x12, 0x3e); + __phy_write(phydev, 0x10, 0x8fa4); /* MrvlTrFix100Kp = 3, MrvlTrFix100Kf = 2, * MrvlTrFix1000Kp = 3, MrvlTrFix1000Kf = 2 @@ -738,7 +784,7 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) __phy_write(phydev, 0x10, 0x8ec0); phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); - /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9*/ + /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 9 */ phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG234, MTK_PHY_TR_OPEN_LOOP_EN_MASK | MTK_PHY_LPF_X_AVERAGE_MASK, BIT(0) | FIELD_PREP(MTK_PHY_LPF_X_AVERAGE_MASK, 0x9)); @@ -771,48 +817,6 @@ static void mt798x_phy_common_finetune(struct phy_device *phydev) phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_LDO_OUTPUT_V, 0x2222); } -static void mt7981_phy_finetune(struct phy_device *phydev) -{ - u16 val[8] = { 0x01ce, 0x01c1, - 0x020f, 0x0202, - 0x03d0, 0x03c0, - 0x0013, 0x0005 }; - int i, k; - - /* 100M eye finetune: - * Keep middle level of TX MLT3 shapper as default. - * Only change TX MLT3 overshoot level here. - */ - for (k = 0, i = 1; i < 12; i++) { - if (i % 3 == 0) - continue; - phy_write_mmd(phydev, MDIO_MMD_VEND1, i, val[k++]); - } - - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - /* SlvDSPreadyTime = 24, MasDSPreadyTime = 24 */ - __phy_write(phydev, 0x11, 0xc71); - __phy_write(phydev, 0x12, 0xc); - __phy_write(phydev, 0x10, 0x8fae); - - /* ResetSyncOffset = 6 */ - __phy_write(phydev, 0x11, 0x600); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x8fc0); - - /* VgaDecRate = 1 */ - __phy_write(phydev, 0x11, 0x4c2a); - __phy_write(phydev, 0x12, 0x3e); - __phy_write(phydev, 0x10, 0x8fa4); - - /* FfeUpdGainForce = 4 */ - __phy_write(phydev, 0x11, 0x240); - __phy_write(phydev, 0x12, 0x0); - __phy_write(phydev, 0x10, 0x9680); - - phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); -} - static void mt7988_phy_finetune(struct phy_device *phydev) { u16 val[12] = { 0x0187, 0x01cd, 0x01c8, 0x0182, @@ -827,17 +831,7 @@ static void mt7988_phy_finetune(struct phy_device *phydev) /* TCT finetune */ phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_TX_FILTER, 0x5); - /* Disable TX power saving */ - phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7, - MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3 << 8); - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_52B5); - - /* SlvDSPreadyTime = 24, MasDSPreadyTime = 12 */ - __phy_write(phydev, 0x11, 0x671); - __phy_write(phydev, 0x12, 0xc); - __phy_write(phydev, 0x10, 0x8fae); - /* ResetSyncOffset = 5 */ __phy_write(phydev, 0x11, 0x500); __phy_write(phydev, 0x12, 0x0); @@ -845,13 +839,27 @@ static void mt7988_phy_finetune(struct phy_device *phydev) /* VgaDecRate is 1 at default on mt7988 */ - phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); + /* MrvlTrFix100Kp = 6, MrvlTrFix100Kf = 7, + * MrvlTrFix1000Kp = 6, MrvlTrFix1000Kf = 7 + */ + __phy_write(phydev, 0x11, 0xb90a); + __phy_write(phydev, 0x12, 0x6f); + __phy_write(phydev, 0x10, 0x8f82); + + /* RemAckCntLimitCtrl = 1 */ + __phy_write(phydev, 0x11, 0xfbba); + __phy_write(phydev, 0x12, 0xc3); + __phy_write(phydev, 0x10, 0x87f8); - phy_select_page(phydev, MTK_PHY_PAGE_EXTENDED_2A30); - /* TxClkOffset = 2 */ - __phy_modify(phydev, MTK_PHY_ANARG_RG, MTK_PHY_TCLKOFFSET_MASK, - FIELD_PREP(MTK_PHY_TCLKOFFSET_MASK, 0x2)); phy_restore_page(phydev, MTK_PHY_PAGE_STANDARD, 0); + + /* TR_OPEN_LOOP_EN = 1, lpf_x_average = 10 */ + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG234, + MTK_PHY_TR_OPEN_LOOP_EN_MASK | MTK_PHY_LPF_X_AVERAGE_MASK, + BIT(0) | FIELD_PREP(MTK_PHY_LPF_X_AVERAGE_MASK, 0xa)); + + /* rg_tr_lpf_cnt_val = 1023 */ + phy_write_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_LPF_CNT_VAL, 0x3ff); } static void mt798x_phy_eee(struct phy_device *phydev) @@ -884,11 +892,11 @@ static void mt798x_phy_eee(struct phy_device *phydev) MTK_PHY_LPI_SLV_SEND_TX_EN, FIELD_PREP(MTK_PHY_LPI_SLV_SEND_TX_TIMER_MASK, 0x120)); - phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG239, - MTK_PHY_LPI_SEND_LOC_TIMER_MASK | - MTK_PHY_LPI_TXPCS_LOC_RCV, - FIELD_PREP(MTK_PHY_LPI_SEND_LOC_TIMER_MASK, 0x117)); + /* Keep MTK_PHY_LPI_SEND_LOC_TIMER as 375 */ + phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG239, + MTK_PHY_LPI_TXPCS_LOC_RCV); + /* This also fixes some IoT issues, such as CH340 */ phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RG_DEV1E_REG2C7, MTK_PHY_MAX_GAIN_MASK | MTK_PHY_MIN_GAIN_MASK, FIELD_PREP(MTK_PHY_MAX_GAIN_MASK, 0x8) | @@ -922,7 +930,7 @@ static void mt798x_phy_eee(struct phy_device *phydev) __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x9690); - /* REG_EEE_st2TrKf1000 = 3 */ + /* REG_EEE_st2TrKf1000 = 2 */ __phy_write(phydev, 0x11, 0x114f); __phy_write(phydev, 0x12, 0x2); __phy_write(phydev, 0x10, 0x969a); @@ -947,7 +955,7 @@ static void mt798x_phy_eee(struct phy_device *phydev) __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x96b8); - /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 1 */ + /* REGEEE_wake_slv_tr_wait_dfesigdet_en = 0 */ __phy_write(phydev, 0x11, 0x1463); __phy_write(phydev, 0x12, 0x0); __phy_write(phydev, 0x10, 0x96ca); @@ -1459,6 +1467,13 @@ static int mt7988_phy_probe(struct phy_device *phydev) if (err) return err; + /* Disable TX power saving at probing to: + * 1. Meet common mode compliance test criteria + * 2. Make sure that TX-VCM calibration works fine + */ + phy_modify_mmd(phydev, MDIO_MMD_VEND1, MTK_PHY_RXADC_CTRL_RG7, + MTK_PHY_DA_AD_BUF_BIAS_LP_MASK, 0x3 << 8); + return mt798x_phy_calibration(phydev); } diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index d7503aef599f..fab361a250d6 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -104,13 +104,12 @@ bool provides_xdp_headroom = true; module_param(provides_xdp_headroom, bool, 0644); static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status); + s8 status); static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st); -static void push_tx_responses(struct xenvif_queue *queue); + s8 status); static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); @@ -208,13 +207,9 @@ static void xenvif_tx_err(struct xenvif_queue *queue, unsigned int extra_count, RING_IDX end) { RING_IDX cons = queue->tx.req_cons; - unsigned long flags; do { - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, extra_count, XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); if (cons == end) break; RING_COPY_REQUEST(&queue->tx, cons++, txp); @@ -465,12 +460,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS; nr_slots--) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, flags); ++txp; continue; } @@ -496,14 +486,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue, for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; ++txp) { if (unlikely(!txp->size)) { - unsigned long flags; - - spin_lock_irqsave(&queue->response_lock, flags); make_tx_response(queue, txp, 0, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); - spin_unlock_irqrestore(&queue->response_lock, - flags); continue; } @@ -995,7 +979,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, (ret == 0) ? XEN_NETIF_RSP_OKAY : XEN_NETIF_RSP_ERROR); - push_tx_responses(queue); continue; } @@ -1007,7 +990,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, make_tx_response(queue, &txreq, extra_count, XEN_NETIF_RSP_OKAY); - push_tx_responses(queue); continue; } @@ -1433,8 +1415,35 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) return work_done; } +static void _make_tx_response(struct xenvif_queue *queue, + const struct xen_netif_tx_request *txp, + unsigned int extra_count, + s8 status) +{ + RING_IDX i = queue->tx.rsp_prod_pvt; + struct xen_netif_tx_response *resp; + + resp = RING_GET_RESPONSE(&queue->tx, i); + resp->id = txp->id; + resp->status = status; + + while (extra_count-- != 0) + RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; + + queue->tx.rsp_prod_pvt = ++i; +} + +static void push_tx_responses(struct xenvif_queue *queue) +{ + int notify; + + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); + if (notify) + notify_remote_via_irq(queue->tx_irq); +} + static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, - u8 status) + s8 status) { struct pending_tx_info *pending_tx_info; pending_ring_idx_t index; @@ -1444,8 +1453,8 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, spin_lock_irqsave(&queue->response_lock, flags); - make_tx_response(queue, &pending_tx_info->req, - pending_tx_info->extra_count, status); + _make_tx_response(queue, &pending_tx_info->req, + pending_tx_info->extra_count, status); /* Release the pending index before pusing the Tx response so * its available before a new Tx request is pushed by the @@ -1459,32 +1468,19 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx, spin_unlock_irqrestore(&queue->response_lock, flags); } - static void make_tx_response(struct xenvif_queue *queue, - struct xen_netif_tx_request *txp, + const struct xen_netif_tx_request *txp, unsigned int extra_count, - s8 st) + s8 status) { - RING_IDX i = queue->tx.rsp_prod_pvt; - struct xen_netif_tx_response *resp; - - resp = RING_GET_RESPONSE(&queue->tx, i); - resp->id = txp->id; - resp->status = st; - - while (extra_count-- != 0) - RING_GET_RESPONSE(&queue->tx, ++i)->status = XEN_NETIF_RSP_NULL; + unsigned long flags; - queue->tx.rsp_prod_pvt = ++i; -} + spin_lock_irqsave(&queue->response_lock, flags); -static void push_tx_responses(struct xenvif_queue *queue) -{ - int notify; + _make_tx_response(queue, txp, extra_count, status); + push_tx_responses(queue); - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify); - if (notify) - notify_remote_via_irq(queue->tx_irq); + spin_unlock_irqrestore(&queue->response_lock, flags); } static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) diff --git a/drivers/regulator/max5970-regulator.c b/drivers/regulator/max5970-regulator.c index bc88a40a88d4..830a1c4cd705 100644 --- a/drivers/regulator/max5970-regulator.c +++ b/drivers/regulator/max5970-regulator.c @@ -392,7 +392,7 @@ static int max597x_regmap_read_clear(struct regmap *map, unsigned int reg, return ret; if (*val) - return regmap_write(map, reg, *val); + return regmap_write(map, reg, 0); return 0; } diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c index 698c420e0869..60cfcd741c2a 100644 --- a/drivers/regulator/pwm-regulator.c +++ b/drivers/regulator/pwm-regulator.c @@ -157,7 +157,17 @@ static int pwm_regulator_get_voltage(struct regulator_dev *rdev) pwm_get_state(drvdata->pwm, &pstate); + if (!pstate.enabled) { + if (pstate.polarity == PWM_POLARITY_INVERSED) + pstate.duty_cycle = pstate.period; + else + pstate.duty_cycle = 0; + } + voltage = pwm_get_relative_duty_cycle(&pstate, duty_unit); + if (voltage < min(max_uV_duty, min_uV_duty) || + voltage > max(max_uV_duty, min_uV_duty)) + return -ENOTRECOVERABLE; /* * The dutycycle for min_uV might be greater than the one for max_uV. @@ -313,6 +323,32 @@ static int pwm_regulator_init_continuous(struct platform_device *pdev, return 0; } +static int pwm_regulator_init_boot_on(struct platform_device *pdev, + struct pwm_regulator_data *drvdata, + const struct regulator_init_data *init_data) +{ + struct pwm_state pstate; + + if (!init_data->constraints.boot_on || drvdata->enb_gpio) + return 0; + + pwm_get_state(drvdata->pwm, &pstate); + if (pstate.enabled) + return 0; + + /* + * Update the duty cycle so the output does not change + * when the regulator core enables the regulator (and + * thus the PWM channel). + */ + if (pstate.polarity == PWM_POLARITY_INVERSED) + pstate.duty_cycle = pstate.period; + else + pstate.duty_cycle = 0; + + return pwm_apply_might_sleep(drvdata->pwm, &pstate); +} + static int pwm_regulator_probe(struct platform_device *pdev) { const struct regulator_init_data *init_data; @@ -372,6 +408,13 @@ static int pwm_regulator_probe(struct platform_device *pdev) if (ret) return ret; + ret = pwm_regulator_init_boot_on(pdev, drvdata, init_data); + if (ret) { + dev_err(&pdev->dev, "Failed to apply boot_on settings: %d\n", + ret); + return ret; + } + regulator = devm_regulator_register(&pdev->dev, &drvdata->desc, &config); if (IS_ERR(regulator)) { diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c index f48214e2c3b4..04133510e5af 100644 --- a/drivers/regulator/ti-abb-regulator.c +++ b/drivers/regulator/ti-abb-regulator.c @@ -726,9 +726,25 @@ static int ti_abb_probe(struct platform_device *pdev) return PTR_ERR(abb->setup_reg); } - abb->int_base = devm_platform_ioremap_resource_byname(pdev, "int-address"); - if (IS_ERR(abb->int_base)) - return PTR_ERR(abb->int_base); + pname = "int-address"; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, pname); + if (!res) { + dev_err(dev, "Missing '%s' IO resource\n", pname); + return -ENODEV; + } + /* + * The MPU interrupt status register (PRM_IRQSTATUS_MPU) is + * shared between regulator-abb-{ivahd,dspeve,gpu} driver + * instances. Therefore use devm_ioremap() rather than + * devm_platform_ioremap_resource_byname() to avoid busy + * resource region conflicts. + */ + abb->int_base = devm_ioremap(dev, res->start, + resource_size(res)); + if (!abb->int_base) { + dev_err(dev, "Unable to map '%s'\n", pname); + return -ENOMEM; + } /* Map Optional resources */ pname = "efuse-address"; diff --git a/drivers/scsi/initio.c b/drivers/scsi/initio.c index 2a50fda3a628..625fd547ee60 100644 --- a/drivers/scsi/initio.c +++ b/drivers/scsi/initio.c @@ -371,7 +371,6 @@ static u16 initio_se2_rd(unsigned long base, u8 addr) */ static void initio_se2_wr(unsigned long base, u8 addr, u16 val) { - u8 rb; u8 instr; int i; @@ -400,7 +399,7 @@ static void initio_se2_wr(unsigned long base, u8 addr, u16 val) udelay(30); outb(SE2CS, base + TUL_NVRAM); /* -CLK */ udelay(30); - if ((rb = inb(base + TUL_NVRAM)) & SE2DI) + if (inb(base + TUL_NVRAM) & SE2DI) break; /* write complete */ } outb(0, base + TUL_NVRAM); /* -CS */ diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index 71f711cb0628..355a0bc0828e 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -3387,7 +3387,7 @@ static enum sci_status isci_io_request_build(struct isci_host *ihost, return SCI_FAILURE; } - return SCI_SUCCESS; + return status; } static struct isci_request *isci_request_from_tag(struct isci_host *ihost, u16 tag) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 79da4b1c1df0..4f455884fdc4 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -61,11 +61,11 @@ static int scsi_eh_try_stu(struct scsi_cmnd *scmd); static enum scsi_disposition scsi_try_to_abort_cmd(const struct scsi_host_template *, struct scsi_cmnd *); -void scsi_eh_wakeup(struct Scsi_Host *shost) +void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy) { lockdep_assert_held(shost->host_lock); - if (scsi_host_busy(shost) == shost->host_failed) { + if (busy == shost->host_failed) { trace_scsi_eh_wakeup(shost); wake_up_process(shost->ehandler); SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost, @@ -88,7 +88,7 @@ void scsi_schedule_eh(struct Scsi_Host *shost) if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 || scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) { shost->host_eh_scheduled++; - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); } spin_unlock_irqrestore(shost->host_lock, flags); @@ -286,7 +286,7 @@ static void scsi_eh_inc_host_failed(struct rcu_head *head) spin_lock_irqsave(shost->host_lock, flags); shost->host_failed++; - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); spin_unlock_irqrestore(shost->host_lock, flags); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index cf3864f72093..1fb80eae9a63 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -280,7 +280,7 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd) if (unlikely(scsi_host_in_recovery(shost))) { spin_lock_irqsave(shost->host_lock, flags); if (shost->host_failed || shost->host_eh_scheduled) - scsi_eh_wakeup(shost); + scsi_eh_wakeup(shost, scsi_host_busy(shost)); spin_unlock_irqrestore(shost->host_lock, flags); } rcu_read_unlock(); diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 3f0dfb97db6b..1fbfe1b52c9f 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -92,7 +92,7 @@ extern void scmd_eh_abort_handler(struct work_struct *work); extern enum blk_eh_timer_return scsi_timeout(struct request *req); extern int scsi_error_handler(void *host); extern enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *cmd); -extern void scsi_eh_wakeup(struct Scsi_Host *shost); +extern void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy); extern void scsi_eh_scmd_add(struct scsi_cmnd *); void scsi_eh_ready_devs(struct Scsi_Host *shost, struct list_head *work_q, diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index a95936b18f69..7ceb982040a5 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -330,6 +330,7 @@ enum storvsc_request_type { */ static int storvsc_ringbuffer_size = (128 * 1024); +static int aligned_ringbuffer_size; static u32 max_outstanding_req_per_channel; static int storvsc_change_queue_depth(struct scsi_device *sdev, int queue_depth); @@ -687,8 +688,8 @@ static void handle_sc_creation(struct vmbus_channel *new_sc) new_sc->next_request_id_callback = storvsc_next_request_id; ret = vmbus_open(new_sc, - storvsc_ringbuffer_size, - storvsc_ringbuffer_size, + aligned_ringbuffer_size, + aligned_ringbuffer_size, (void *)&props, sizeof(struct vmstorage_channel_properties), storvsc_on_channel_callback, new_sc); @@ -1973,7 +1974,7 @@ static int storvsc_probe(struct hv_device *device, dma_set_min_align_mask(&device->device, HV_HYP_PAGE_SIZE - 1); stor_device->port_number = host->host_no; - ret = storvsc_connect_to_vsp(device, storvsc_ringbuffer_size, is_fc); + ret = storvsc_connect_to_vsp(device, aligned_ringbuffer_size, is_fc); if (ret) goto err_out1; @@ -2164,7 +2165,7 @@ static int storvsc_resume(struct hv_device *hv_dev) { int ret; - ret = storvsc_connect_to_vsp(hv_dev, storvsc_ringbuffer_size, + ret = storvsc_connect_to_vsp(hv_dev, aligned_ringbuffer_size, hv_dev_is_fc(hv_dev)); return ret; } @@ -2198,8 +2199,9 @@ static int __init storvsc_drv_init(void) * the ring buffer indices) by the max request size (which is * vmbus_channel_packet_multipage_buffer + struct vstor_packet + u64) */ + aligned_ringbuffer_size = VMBUS_RING_SIZE(storvsc_ringbuffer_size); max_outstanding_req_per_channel = - ((storvsc_ringbuffer_size - PAGE_SIZE) / + ((aligned_ringbuffer_size - PAGE_SIZE) / ALIGN(MAX_MULTIPAGE_BUFFER_PACKET + sizeof(struct vstor_packet) + sizeof(u64), sizeof(u64))); diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 4cf20be668a6..617eb892f4ad 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -188,8 +188,6 @@ static void virtscsi_vq_done(struct virtio_scsi *vscsi, while ((buf = virtqueue_get_buf(vq, &len)) != NULL) fn(vscsi, buf); - if (unlikely(virtqueue_is_broken(vq))) - break; } while (!virtqueue_enable_cb(vq)); spin_unlock_irqrestore(&virtscsi_vq->vq_lock, flags); } diff --git a/drivers/soc/apple/mailbox.c b/drivers/soc/apple/mailbox.c index 780199bf351e..49a0955e82d6 100644 --- a/drivers/soc/apple/mailbox.c +++ b/drivers/soc/apple/mailbox.c @@ -296,14 +296,14 @@ struct apple_mbox *apple_mbox_get(struct device *dev, int index) of_node_put(args.np); if (!pdev) - return ERR_PTR(EPROBE_DEFER); + return ERR_PTR(-EPROBE_DEFER); mbox = platform_get_drvdata(pdev); if (!mbox) - return ERR_PTR(EPROBE_DEFER); + return ERR_PTR(-EPROBE_DEFER); if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_CONSUMER)) - return ERR_PTR(ENODEV); + return ERR_PTR(-ENODEV); return mbox; } diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index cfc3b1ddbd22..6f12e4fb2e2e 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -136,14 +136,14 @@ struct sh_msiof_spi_priv { /* SIFCTR */ #define SIFCTR_TFWM_MASK GENMASK(31, 29) /* Transmit FIFO Watermark */ -#define SIFCTR_TFWM_64 (0 << 29) /* Transfer Request when 64 empty stages */ -#define SIFCTR_TFWM_32 (1 << 29) /* Transfer Request when 32 empty stages */ -#define SIFCTR_TFWM_24 (2 << 29) /* Transfer Request when 24 empty stages */ -#define SIFCTR_TFWM_16 (3 << 29) /* Transfer Request when 16 empty stages */ -#define SIFCTR_TFWM_12 (4 << 29) /* Transfer Request when 12 empty stages */ -#define SIFCTR_TFWM_8 (5 << 29) /* Transfer Request when 8 empty stages */ -#define SIFCTR_TFWM_4 (6 << 29) /* Transfer Request when 4 empty stages */ -#define SIFCTR_TFWM_1 (7 << 29) /* Transfer Request when 1 empty stage */ +#define SIFCTR_TFWM_64 (0UL << 29) /* Transfer Request when 64 empty stages */ +#define SIFCTR_TFWM_32 (1UL << 29) /* Transfer Request when 32 empty stages */ +#define SIFCTR_TFWM_24 (2UL << 29) /* Transfer Request when 24 empty stages */ +#define SIFCTR_TFWM_16 (3UL << 29) /* Transfer Request when 16 empty stages */ +#define SIFCTR_TFWM_12 (4UL << 29) /* Transfer Request when 12 empty stages */ +#define SIFCTR_TFWM_8 (5UL << 29) /* Transfer Request when 8 empty stages */ +#define SIFCTR_TFWM_4 (6UL << 29) /* Transfer Request when 4 empty stages */ +#define SIFCTR_TFWM_1 (7UL << 29) /* Transfer Request when 1 empty stage */ #define SIFCTR_TFUA_MASK GENMASK(26, 20) /* Transmit FIFO Usable Area */ #define SIFCTR_TFUA_SHIFT 20 #define SIFCTR_TFUA(i) ((i) << SIFCTR_TFUA_SHIFT) diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 279933e007d2..7cc5841577b2 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -11,13 +11,12 @@ struct z_erofs_decompress_req { struct super_block *sb; struct page **in, **out; - unsigned short pageofs_in, pageofs_out; unsigned int inputsize, outputsize; - /* indicate the algorithm will be used for decompression */ - unsigned int alg; + unsigned int alg; /* the algorithm for decompression */ bool inplace_io, partial_decoding, fillgaps; + gfp_t gfp; /* allocation flags for extra temporary buffers */ }; struct z_erofs_decompressor { diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 072ef6a66823..d4cee95af14c 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -111,8 +111,9 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, victim = availables[--top]; get_page(victim); } else { - victim = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + victim = erofs_allocpage(pagepool, rq->gfp); + if (!victim) + return -ENOMEM; set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE); } rq->out[i] = victim; diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c index 4a64a9c91dd3..b98872058abe 100644 --- a/fs/erofs/decompressor_deflate.c +++ b/fs/erofs/decompressor_deflate.c @@ -95,7 +95,7 @@ int z_erofs_load_deflate_config(struct super_block *sb, } int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, - struct page **pagepool) + struct page **pgpl) { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; @@ -158,8 +158,12 @@ again: strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs); outsz -= strm->z.avail_out; if (!rq->out[no]) { - rq->out[no] = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + rq->out[no] = erofs_allocpage(pgpl, rq->gfp); + if (!rq->out[no]) { + kout = NULL; + err = -ENOMEM; + break; + } set_page_private(rq->out[no], Z_EROFS_SHORTLIVED_PAGE); } @@ -211,8 +215,11 @@ again: DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb), rq->in[j])); - tmppage = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + tmppage = erofs_allocpage(pgpl, rq->gfp); + if (!tmppage) { + err = -ENOMEM; + goto failed; + } set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); copy_highpage(tmppage, rq->in[j]); rq->in[j] = tmppage; @@ -230,7 +237,7 @@ again: break; } } - +failed: if (zlib_inflateEnd(&strm->z) != Z_OK && !err) err = -EIO; if (kout) diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 2dd14f99c1dc..6ca357d83cfa 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -148,7 +148,7 @@ again: } int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, - struct page **pagepool) + struct page **pgpl) { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; @@ -215,8 +215,11 @@ again: PAGE_SIZE - pageofs); outlen -= strm->buf.out_size; if (!rq->out[no] && rq->fillgaps) { /* deduped */ - rq->out[no] = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + rq->out[no] = erofs_allocpage(pgpl, rq->gfp); + if (!rq->out[no]) { + err = -ENOMEM; + break; + } set_page_private(rq->out[no], Z_EROFS_SHORTLIVED_PAGE); } @@ -258,8 +261,11 @@ again: DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb), rq->in[j])); - tmppage = erofs_allocpage(pagepool, - GFP_KERNEL | __GFP_NOFAIL); + tmppage = erofs_allocpage(pgpl, rq->gfp); + if (!tmppage) { + err = -ENOMEM; + goto failed; + } set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); copy_highpage(tmppage, rq->in[j]); rq->in[j] = tmppage; @@ -277,6 +283,7 @@ again: break; } } +failed: if (no < nrpages_out && strm->buf.out) kunmap(rq->out[no]); if (ni < nrpages_in) diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index bc12030393b2..5ff90026fd43 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -459,7 +459,7 @@ static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &erofs_fscache_meta_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); inode->i_blkbits = EROFS_SB(sb)->blkszbits; inode->i_private = ctx; diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 3d616dea55dc..36e638e8b53a 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -60,7 +60,7 @@ static void *erofs_read_inode(struct erofs_buf *buf, } else { const unsigned int gotten = sb->s_blocksize - *ofs; - copied = kmalloc(vi->inode_isize, GFP_NOFS); + copied = kmalloc(vi->inode_isize, GFP_KERNEL); if (!copied) { err = -ENOMEM; goto err_out; diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c index 5dea308764b4..e146d09151af 100644 --- a/fs/erofs/utils.c +++ b/fs/erofs/utils.c @@ -81,7 +81,7 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, repeat: xa_lock(&sbi->managed_pslots); pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, - NULL, grp, GFP_NOFS); + NULL, grp, GFP_KERNEL); if (pre) { if (xa_is_err(pre)) { pre = ERR_PTR(xa_err(pre)); diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 692c0c39be63..ff0aa72b0db3 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -82,6 +82,9 @@ struct z_erofs_pcluster { /* L: indicate several pageofs_outs or not */ bool multibases; + /* L: whether extra buffer allocations are best-effort */ + bool besteffort; + /* A: compressed bvecs (can be cached or inplaced pages) */ struct z_erofs_bvec compressed_bvecs[]; }; @@ -230,7 +233,7 @@ static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter, struct page *nextpage = *candidate_bvpage; if (!nextpage) { - nextpage = erofs_allocpage(pagepool, GFP_NOFS); + nextpage = erofs_allocpage(pagepool, GFP_KERNEL); if (!nextpage) return -ENOMEM; set_page_private(nextpage, Z_EROFS_SHORTLIVED_PAGE); @@ -302,7 +305,7 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size) if (nrpages > pcs->maxpages) continue; - pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS); + pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL); if (!pcl) return ERR_PTR(-ENOMEM); pcl->pclustersize = size; @@ -563,21 +566,19 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; unsigned int i; - if (i_blocksize(fe->inode) != PAGE_SIZE) - return; - if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED) + if (i_blocksize(fe->inode) != PAGE_SIZE || + fe->mode < Z_EROFS_PCLUSTER_FOLLOWED) return; for (i = 0; i < pclusterpages; ++i) { struct page *page, *newpage; void *t; /* mark pages just found for debugging */ - /* the compressed page was loaded before */ + /* Inaccurate check w/o locking to avoid unneeded lookups */ if (READ_ONCE(pcl->compressed_bvecs[i].page)) continue; page = find_get_page(mc, pcl->obj.index + i); - if (page) { t = (void *)((unsigned long)page | 1); newpage = NULL; @@ -597,9 +598,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); t = (void *)((unsigned long)newpage | 1); } - - if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL, t)) + spin_lock(&pcl->obj.lockref.lock); + if (!pcl->compressed_bvecs[i].page) { + pcl->compressed_bvecs[i].page = t; + spin_unlock(&pcl->obj.lockref.lock); continue; + } + spin_unlock(&pcl->obj.lockref.lock); if (page) put_page(page); @@ -694,7 +699,7 @@ static void z_erofs_cache_invalidate_folio(struct folio *folio, DBG_BUGON(stop > folio_size(folio) || stop < length); if (offset == 0 && stop == folio_size(folio)) - while (!z_erofs_cache_release_folio(folio, GFP_NOFS)) + while (!z_erofs_cache_release_folio(folio, 0)) cond_resched(); } @@ -713,36 +718,30 @@ int erofs_init_managed_cache(struct super_block *sb) set_nlink(inode, 1); inode->i_size = OFFSET_MAX; inode->i_mapping->a_ops = &z_erofs_cache_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); + mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); EROFS_SB(sb)->managed_cache = inode; return 0; } -static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe, - struct z_erofs_bvec *bvec) -{ - struct z_erofs_pcluster *const pcl = fe->pcl; - - while (fe->icur > 0) { - if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page, - NULL, bvec->page)) { - pcl->compressed_bvecs[fe->icur] = *bvec; - return true; - } - } - return false; -} - /* callers must be with pcluster lock held */ static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, struct z_erofs_bvec *bvec, bool exclusive) { + struct z_erofs_pcluster *pcl = fe->pcl; int ret; if (exclusive) { /* give priority for inplaceio to use file pages first */ - if (z_erofs_try_inplace_io(fe, bvec)) + spin_lock(&pcl->obj.lockref.lock); + while (fe->icur > 0) { + if (pcl->compressed_bvecs[--fe->icur].page) + continue; + pcl->compressed_bvecs[fe->icur] = *bvec; + spin_unlock(&pcl->obj.lockref.lock); return 0; + } + spin_unlock(&pcl->obj.lockref.lock); + /* otherwise, check if it can be used as a bvpage */ if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED && !fe->candidate_bvpage) @@ -964,7 +963,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page, } static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, - struct page *page) + struct page *page, bool ra) { struct inode *const inode = fe->inode; struct erofs_map_blocks *const map = &fe->map; @@ -1014,6 +1013,7 @@ repeat: err = z_erofs_pcluster_begin(fe); if (err) goto out; + fe->pcl->besteffort |= !ra; } /* @@ -1280,6 +1280,9 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, .inplace_io = overlapped, .partial_decoding = pcl->partial, .fillgaps = pcl->multibases, + .gfp = pcl->besteffort ? + GFP_KERNEL | __GFP_NOFAIL : + GFP_NOWAIT | __GFP_NORETRY }, be->pagepool); /* must handle all compressed pages before actual file pages */ @@ -1322,6 +1325,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, pcl->length = 0; pcl->partial = true; pcl->multibases = false; + pcl->besteffort = false; pcl->bvset.nextpage = NULL; pcl->vcnt = 0; @@ -1423,23 +1427,26 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, { gfp_t gfp = mapping_gfp_mask(mc); bool tocache = false; - struct z_erofs_bvec *zbv = pcl->compressed_bvecs + nr; + struct z_erofs_bvec zbv; struct address_space *mapping; - struct page *page, *oldpage; + struct page *page; int justfound, bs = i_blocksize(f->inode); /* Except for inplace pages, the entire page can be used for I/Os */ bvec->bv_offset = 0; bvec->bv_len = PAGE_SIZE; repeat: - oldpage = READ_ONCE(zbv->page); - if (!oldpage) + spin_lock(&pcl->obj.lockref.lock); + zbv = pcl->compressed_bvecs[nr]; + page = zbv.page; + justfound = (unsigned long)page & 1UL; + page = (struct page *)((unsigned long)page & ~1UL); + pcl->compressed_bvecs[nr].page = page; + spin_unlock(&pcl->obj.lockref.lock); + if (!page) goto out_allocpage; - justfound = (unsigned long)oldpage & 1UL; - page = (struct page *)((unsigned long)oldpage & ~1UL); bvec->bv_page = page; - DBG_BUGON(z_erofs_is_shortlived_page(page)); /* * Handle preallocated cached pages. We tried to allocate such pages @@ -1448,7 +1455,6 @@ repeat: */ if (page->private == Z_EROFS_PREALLOCATED_PAGE) { set_page_private(page, 0); - WRITE_ONCE(zbv->page, page); tocache = true; goto out_tocache; } @@ -1459,9 +1465,9 @@ repeat: * therefore it is impossible for `mapping` to be NULL. */ if (mapping && mapping != mc) { - if (zbv->offset < 0) - bvec->bv_offset = round_up(-zbv->offset, bs); - bvec->bv_len = round_up(zbv->end, bs) - bvec->bv_offset; + if (zbv.offset < 0) + bvec->bv_offset = round_up(-zbv.offset, bs); + bvec->bv_len = round_up(zbv.end, bs) - bvec->bv_offset; return; } @@ -1471,7 +1477,6 @@ repeat: /* the cached page is still in managed cache */ if (page->mapping == mc) { - WRITE_ONCE(zbv->page, page); /* * The cached page is still available but without a valid * `->private` pcluster hint. Let's reconnect them. @@ -1503,11 +1508,15 @@ repeat: put_page(page); out_allocpage: page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); - if (oldpage != cmpxchg(&zbv->page, oldpage, page)) { + spin_lock(&pcl->obj.lockref.lock); + if (pcl->compressed_bvecs[nr].page) { erofs_pagepool_add(&f->pagepool, page); + spin_unlock(&pcl->obj.lockref.lock); cond_resched(); goto repeat; } + pcl->compressed_bvecs[nr].page = page; + spin_unlock(&pcl->obj.lockref.lock); bvec->bv_page = page; out_tocache: if (!tocache || bs != PAGE_SIZE || @@ -1685,6 +1694,7 @@ submit_bio_retry: if (cur + bvec.bv_len > end) bvec.bv_len = end - cur; + DBG_BUGON(bvec.bv_len < sb->s_blocksize); if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len, bvec.bv_offset)) goto submit_bio_retry; @@ -1785,7 +1795,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, if (PageUptodate(page)) unlock_page(page); else - (void)z_erofs_do_read_page(f, page); + (void)z_erofs_do_read_page(f, page, !!rac); put_page(page); } @@ -1806,7 +1816,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio) f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT; z_erofs_pcluster_readmore(&f, NULL, true); - err = z_erofs_do_read_page(&f, &folio->page); + err = z_erofs_do_read_page(&f, &folio->page, false); z_erofs_pcluster_readmore(&f, NULL, false); z_erofs_pcluster_end(&f); @@ -1847,7 +1857,7 @@ static void z_erofs_readahead(struct readahead_control *rac) folio = head; head = folio_get_private(folio); - err = z_erofs_do_read_page(&f, &folio->page); + err = z_erofs_do_read_page(&f, &folio->page, true); if (err && err != -EINTR) erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu", folio->index, EROFS_I(inode)->nid); diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 522edcbb2ce4..0687f952956c 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -501,7 +501,7 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = mapping->host; struct exfat_inode_info *ei = EXFAT_I(inode); loff_t pos = iocb->ki_pos; - loff_t size = iocb->ki_pos + iov_iter_count(iter); + loff_t size = pos + iov_iter_count(iter); int rw = iov_iter_rw(iter); ssize_t ret; @@ -525,11 +525,10 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter) */ ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block); if (ret < 0) { - if (rw == WRITE) + if (rw == WRITE && ret != -EIOCBQUEUED) exfat_write_failed(mapping, size); - if (ret != -EIOCBQUEUED) - return ret; + return ret; } else size = pos + ret; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ea5b8e57d904..671664fed307 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -340,7 +340,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) } else { folio_unlock(folio); - if (!folio_test_has_hwpoisoned(folio)) + if (!folio_test_hwpoison(folio)) want = nr; else { /* diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 8eec84c651bf..cb3cda1390ad 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -2763,9 +2763,7 @@ static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl) * leafno - the number of the leaf to be updated. * newval - the new value for the leaf. * - * RETURN VALUES: - * 0 - success - * -EIO - i/o error + * RETURN VALUES: none */ static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl) { @@ -2792,10 +2790,6 @@ static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl) * get the buddy size (number of words covered) of * the new value. */ - - if ((newval - tp->dmt_budmin) > BUDMIN) - return -EIO; - budsz = BUDSIZE(newval, tp->dmt_budmin); /* try to join. diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 6b211522a13e..1c3dd0ad4660 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -281,44 +281,6 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode, inode->i_gid = attr->gid; } -static void update_gid(struct eventfs_inode *ei, kgid_t gid, int level) -{ - struct eventfs_inode *ei_child; - - /* at most we have events/system/event */ - if (WARN_ON_ONCE(level > 3)) - return; - - ei->attr.gid = gid; - - if (ei->entry_attrs) { - for (int i = 0; i < ei->nr_entries; i++) { - ei->entry_attrs[i].gid = gid; - } - } - - /* - * Only eventfs_inode with dentries are updated, make sure - * all eventfs_inodes are updated. If one of the children - * do not have a dentry, this function must traverse it. - */ - list_for_each_entry_srcu(ei_child, &ei->children, list, - srcu_read_lock_held(&eventfs_srcu)) { - if (!ei_child->dentry) - update_gid(ei_child, gid, level + 1); - } -} - -void eventfs_update_gid(struct dentry *dentry, kgid_t gid) -{ - struct eventfs_inode *ei = dentry->d_fsdata; - int idx; - - idx = srcu_read_lock(&eventfs_srcu); - update_gid(ei, gid, 0); - srcu_read_unlock(&eventfs_srcu, idx); -} - /** * create_file - create a file in the tracefs filesystem * @name: the name of the file to create. diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index 45397df9bb65..91c2bf0b91d9 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -82,7 +82,6 @@ struct inode *tracefs_get_inode(struct super_block *sb); struct dentry *eventfs_start_creating(const char *name, struct dentry *parent); struct dentry *eventfs_failed_creating(struct dentry *dentry); struct dentry *eventfs_end_creating(struct dentry *dentry); -void eventfs_update_gid(struct dentry *dentry, kgid_t gid); void eventfs_set_ei_status_free(struct tracefs_inode *ti, struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 840cd254172d..7118ac28d468 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -77,17 +77,6 @@ enum hid_bpf_attach_flags { int hid_bpf_device_event(struct hid_bpf_ctx *ctx); int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx); -/* Following functions are kfunc that we export to BPF programs */ -/* available everywhere in HID-BPF */ -__u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz); - -/* only available in syscall */ -int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags); -int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, - enum hid_report_type rtype, enum hid_class_request reqtype); -struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id); -void hid_bpf_release_context(struct hid_bpf_ctx *ctx); - /* * Below is HID internal */ diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 185924c56378..76458b6d53da 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -315,9 +315,9 @@ LSM_HOOK(int, 0, socket_getsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_setsockopt, struct socket *sock, int level, int optname) LSM_HOOK(int, 0, socket_shutdown, struct socket *sock, int how) LSM_HOOK(int, 0, socket_sock_rcv_skb, struct sock *sk, struct sk_buff *skb) -LSM_HOOK(int, 0, socket_getpeersec_stream, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_stream, struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) -LSM_HOOK(int, 0, socket_getpeersec_dgram, struct socket *sock, +LSM_HOOK(int, -ENOPROTOOPT, socket_getpeersec_dgram, struct socket *sock, struct sk_buff *skb, u32 *secid) LSM_HOOK(int, 0, sk_alloc_security, struct sock *sk, int family, gfp_t priority) LSM_HOOK(void, LSM_RET_VOID, sk_free_security, struct sock *sk) diff --git a/include/linux/mman.h b/include/linux/mman.h index 40d94411d492..dc7048824be8 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -156,6 +156,7 @@ calc_vm_flag_bits(unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | + _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | arch_calc_vm_flag_bits(flags); } diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 4ed33b127821..a497f189d988 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -2013,9 +2013,9 @@ static inline int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; ms = __pfn_to_section(pfn); - rcu_read_lock(); + rcu_read_lock_sched(); if (!valid_section(ms)) { - rcu_read_unlock(); + rcu_read_unlock_sched(); return 0; } /* @@ -2023,7 +2023,7 @@ static inline int pfn_valid(unsigned long pfn) * the entire section-sized span. */ ret = early_section(ms) || pfn_section_valid(ms, pfn); - rcu_read_unlock(); + rcu_read_unlock_sched(); return ret; } diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e8c350a3ade1..e9f4f845d760 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -186,6 +186,8 @@ struct ip_set_type_variant { /* Return true if "b" set is the same as "a" * according to the create set parameters */ bool (*same_set)(const struct ip_set *a, const struct ip_set *b); + /* Cancel ongoing garbage collectors before destroying the set*/ + void (*cancel_gc)(struct ip_set *set); /* Region-locking is used */ bool region_lock; }; @@ -242,6 +244,8 @@ extern void ip_set_type_unregister(struct ip_set_type *set_type); /* A generic IP set */ struct ip_set { + /* For call_cru in destroy */ + struct rcu_head rcu; /* The name of the set */ char name[IPSET_MAXNAMELEN]; /* Lock protecting the set data */ diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 49c4640027d8..afd40dce40f3 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -46,12 +46,6 @@ struct scm_stat { #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) -#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) -#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) -#define unix_state_lock_nested(s) \ - spin_lock_nested(&unix_sk(s)->lock, \ - SINGLE_DEPTH_NESTING) - /* The AF_UNIX socket */ struct unix_sock { /* WARNING: sk has to be the first member */ @@ -77,6 +71,20 @@ struct unix_sock { #define unix_sk(ptr) container_of_const(ptr, struct unix_sock, sk) #define unix_peer(sk) (unix_sk(sk)->peer) +#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) +#define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) +enum unix_socket_lock_class { + U_LOCK_NORMAL, + U_LOCK_SECOND, /* for double locking, see unix_state_double_lock(). */ + U_LOCK_DIAG, /* used while dumping icons, see sk_diag_dump_icons(). */ +}; + +static inline void unix_state_lock_nested(struct sock *sk, + enum unix_socket_lock_class subclass) +{ + spin_lock_nested(&unix_sk(sk)->lock, subclass); +} + #define peer_wait peer_wq.wait long unix_inq_len(struct sock *sk); diff --git a/include/net/ip.h b/include/net/ip.h index de0c69c57e3c..25cb688bdc62 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -767,7 +767,7 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev); * Functions provided by ip_sockglue.c */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst); void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4e1ea18eb5f0..001226c34621 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1351,6 +1351,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, * @type: stateful object numeric type * @owner: module owner * @maxattr: maximum netlink attribute + * @family: address family for AF-specific object types * @policy: netlink attribute policy */ struct nft_object_type { @@ -1360,6 +1361,7 @@ struct nft_object_type { struct list_head list; u32 type; unsigned int maxattr; + u8 family; struct module *owner; const struct nla_policy *policy; }; diff --git a/init/Kconfig b/init/Kconfig index 8d4e836e1b6b..deda3d14135b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -876,13 +876,13 @@ config CC_NO_ARRAY_BOUNDS bool default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC11_NO_ARRAY_BOUNDS -# Currently, disable -Wstringop-overflow for GCC 11, globally. -config GCC11_NO_STRINGOP_OVERFLOW +# Currently, disable -Wstringop-overflow for GCC globally. +config GCC_NO_STRINGOP_OVERFLOW def_bool y config CC_NO_STRINGOP_OVERFLOW bool - default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_STRINGOP_OVERFLOW + default y if CC_IS_GCC && GCC_NO_STRINGOP_OVERFLOW config CC_STRINGOP_OVERFLOW bool diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 485bb0389b48..929e98c62965 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -537,7 +537,7 @@ retry: } } - ret = __replace_page(vma, vaddr, old_page, new_page); + ret = __replace_page(vma, vaddr & PAGE_MASK, old_page, new_page); if (new_page) put_page(new_page); put_old: diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 46439e3bcec4..b33c3861fbbb 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1470,8 +1470,10 @@ register_snapshot_trigger(char *glob, struct event_trigger_data *data, struct trace_event_file *file) { - if (tracing_alloc_snapshot_instance(file->tr) != 0) - return 0; + int ret = tracing_alloc_snapshot_instance(file->tr); + + if (ret < 0) + return ret; return register_trigger(glob, data, file); } diff --git a/lib/kunit/device.c b/lib/kunit/device.c index f5371287b375..074c6dd2e36a 100644 --- a/lib/kunit/device.c +++ b/lib/kunit/device.c @@ -45,8 +45,8 @@ int kunit_bus_init(void) int error; kunit_bus_device = root_device_register("kunit"); - if (!kunit_bus_device) - return -ENOMEM; + if (IS_ERR(kunit_bus_device)) + return PTR_ERR(kunit_bus_device); error = bus_register(&kunit_bus_type); if (error) diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index 717b9599036b..689fff2b2b10 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -146,6 +146,10 @@ void kunit_free_suite_set(struct kunit_suite_set suite_set) kfree(suite_set.start); } +/* + * Filter and reallocate test suites. Must return the filtered test suites set + * allocated at a valid virtual address or NULL in case of error. + */ struct kunit_suite_set kunit_filter_suites(const struct kunit_suite_set *suite_set, const char *filter_glob, diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index c4259d910356..f7980ef236a3 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -720,7 +720,7 @@ static void kunit_device_cleanup_test(struct kunit *test) long action_was_run = 0; test_device = kunit_device_register(test, "my_device"); - KUNIT_ASSERT_NOT_NULL(test, test_device); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, test_device); /* Add an action to verify cleanup. */ devm_add_action(test_device, test_dev_action, &action_was_run); diff --git a/lib/kunit/test.c b/lib/kunit/test.c index f95d2093a0aa..31a5a992e646 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -17,6 +17,7 @@ #include <linux/panic.h> #include <linux/sched/debug.h> #include <linux/sched.h> +#include <linux/mm.h> #include "debugfs.h" #include "device-impl.h" @@ -801,12 +802,19 @@ static void kunit_module_exit(struct module *mod) }; const char *action = kunit_action(); + /* + * Check if the start address is a valid virtual address to detect + * if the module load sequence has failed and the suite set has not + * been initialized and filtered. + */ + if (!suite_set.start || !virt_addr_valid(suite_set.start)) + return; + if (!action) __kunit_test_suites_exit(mod->kunit_suites, mod->num_kunit_suites); - if (suite_set.start) - kunit_free_suite_set(suite_set); + kunit_free_suite_set(suite_set); } static int kunit_module_notify(struct notifier_block *nb, unsigned long val, @@ -816,12 +824,12 @@ static int kunit_module_notify(struct notifier_block *nb, unsigned long val, switch (val) { case MODULE_STATE_LIVE: + kunit_module_init(mod); break; case MODULE_STATE_GOING: kunit_module_exit(mod); break; case MODULE_STATE_COMING: - kunit_module_init(mod); break; case MODULE_STATE_UNFORMED: break; diff --git a/lib/stackdepot.c b/lib/stackdepot.c index a0be5d05c7f0..5caa1f566553 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) "stackdepot: " fmt +#include <linux/debugfs.h> #include <linux/gfp.h> #include <linux/jhash.h> #include <linux/kernel.h> @@ -21,8 +22,9 @@ #include <linux/list.h> #include <linux/mm.h> #include <linux/mutex.h> -#include <linux/percpu.h> #include <linux/printk.h> +#include <linux/rculist.h> +#include <linux/rcupdate.h> #include <linux/refcount.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -67,12 +69,28 @@ union handle_parts { }; struct stack_record { - struct list_head list; /* Links in hash table or freelist */ + struct list_head hash_list; /* Links in the hash table */ u32 hash; /* Hash in hash table */ u32 size; /* Number of stored frames */ - union handle_parts handle; + union handle_parts handle; /* Constant after initialization */ refcount_t count; - unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ + union { + unsigned long entries[CONFIG_STACKDEPOT_MAX_FRAMES]; /* Frames */ + struct { + /* + * An important invariant of the implementation is to + * only place a stack record onto the freelist iff its + * refcount is zero. Because stack records with a zero + * refcount are never considered as valid, it is safe to + * union @entries and freelist management state below. + * Conversely, as soon as an entry is off the freelist + * and its refcount becomes non-zero, the below must not + * be accessed until being placed back on the freelist. + */ + struct list_head free_list; /* Links in the freelist */ + unsigned long rcu_state; /* RCU cookie */ + }; + }; }; #define DEPOT_STACK_RECORD_SIZE \ @@ -112,8 +130,25 @@ static LIST_HEAD(free_stacks); * yet allocated or if the limit on the number of pools is reached. */ static bool new_pool_required = true; -/* Lock that protects the variables above. */ -static DEFINE_RWLOCK(pool_rwlock); +/* The lock must be held when performing pool or freelist modifications. */ +static DEFINE_RAW_SPINLOCK(pool_lock); + +/* Statistics counters for debugfs. */ +enum depot_counter_id { + DEPOT_COUNTER_ALLOCS, + DEPOT_COUNTER_FREES, + DEPOT_COUNTER_INUSE, + DEPOT_COUNTER_FREELIST_SIZE, + DEPOT_COUNTER_COUNT, +}; +static long counters[DEPOT_COUNTER_COUNT]; +static const char *const counter_names[] = { + [DEPOT_COUNTER_ALLOCS] = "allocations", + [DEPOT_COUNTER_FREES] = "frees", + [DEPOT_COUNTER_INUSE] = "in_use", + [DEPOT_COUNTER_FREELIST_SIZE] = "freelist_size", +}; +static_assert(ARRAY_SIZE(counter_names) == DEPOT_COUNTER_COUNT); static int __init disable_stack_depot(char *str) { @@ -258,14 +293,15 @@ out_unlock: } EXPORT_SYMBOL_GPL(stack_depot_init); -/* Initializes a stack depol pool. */ +/* + * Initializes new stack depot @pool, release all its entries to the freelist, + * and update the list of pools. + */ static void depot_init_pool(void *pool) { int offset; - lockdep_assert_held_write(&pool_rwlock); - - WARN_ON(!list_empty(&free_stacks)); + lockdep_assert_held(&pool_lock); /* Initialize handles and link stack records into the freelist. */ for (offset = 0; offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE; @@ -276,18 +312,36 @@ static void depot_init_pool(void *pool) stack->handle.offset = offset >> DEPOT_STACK_ALIGN; stack->handle.extra = 0; - list_add(&stack->list, &free_stacks); + /* + * Stack traces of size 0 are never saved, and we can simply use + * the size field as an indicator if this is a new unused stack + * record in the freelist. + */ + stack->size = 0; + + INIT_LIST_HEAD(&stack->hash_list); + /* + * Add to the freelist front to prioritize never-used entries: + * required in case there are entries in the freelist, but their + * RCU cookie still belongs to the current RCU grace period + * (there can still be concurrent readers). + */ + list_add(&stack->free_list, &free_stacks); + counters[DEPOT_COUNTER_FREELIST_SIZE]++; } /* Save reference to the pool to be used by depot_fetch_stack(). */ stack_pools[pools_num] = pool; - pools_num++; + + /* Pairs with concurrent READ_ONCE() in depot_fetch_stack(). */ + WRITE_ONCE(pools_num, pools_num + 1); + ASSERT_EXCLUSIVE_WRITER(pools_num); } /* Keeps the preallocated memory to be used for a new stack depot pool. */ static void depot_keep_new_pool(void **prealloc) { - lockdep_assert_held_write(&pool_rwlock); + lockdep_assert_held(&pool_lock); /* * If a new pool is already saved or the maximum number of @@ -310,17 +364,16 @@ static void depot_keep_new_pool(void **prealloc) * number of pools is reached. In either case, take note that * keeping another pool is not required. */ - new_pool_required = false; + WRITE_ONCE(new_pool_required, false); } -/* Updates references to the current and the next stack depot pools. */ -static bool depot_update_pools(void **prealloc) +/* + * Try to initialize a new stack depot pool from either a previous or the + * current pre-allocation, and release all its entries to the freelist. + */ +static bool depot_try_init_pool(void **prealloc) { - lockdep_assert_held_write(&pool_rwlock); - - /* Check if we still have objects in the freelist. */ - if (!list_empty(&free_stacks)) - goto out_keep_prealloc; + lockdep_assert_held(&pool_lock); /* Check if we have a new pool saved and use it. */ if (new_pool) { @@ -329,10 +382,9 @@ static bool depot_update_pools(void **prealloc) /* Take note that we might need a new new_pool. */ if (pools_num < DEPOT_MAX_POOLS) - new_pool_required = true; + WRITE_ONCE(new_pool_required, true); - /* Try keeping the preallocated memory for new_pool. */ - goto out_keep_prealloc; + return true; } /* Bail out if we reached the pool limit. */ @@ -349,12 +401,32 @@ static bool depot_update_pools(void **prealloc) } return false; +} + +/* Try to find next free usable entry. */ +static struct stack_record *depot_pop_free(void) +{ + struct stack_record *stack; + + lockdep_assert_held(&pool_lock); + + if (list_empty(&free_stacks)) + return NULL; + + /* + * We maintain the invariant that the elements in front are least + * recently used, and are therefore more likely to be associated with an + * RCU grace period in the past. Consequently it is sufficient to only + * check the first entry. + */ + stack = list_first_entry(&free_stacks, struct stack_record, free_list); + if (stack->size && !poll_state_synchronize_rcu(stack->rcu_state)) + return NULL; + + list_del(&stack->free_list); + counters[DEPOT_COUNTER_FREELIST_SIZE]--; -out_keep_prealloc: - /* Keep the preallocated memory for a new pool if required. */ - if (*prealloc) - depot_keep_new_pool(prealloc); - return true; + return stack; } /* Allocates a new stack in a stack depot pool. */ @@ -363,19 +435,22 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) { struct stack_record *stack; - lockdep_assert_held_write(&pool_rwlock); + lockdep_assert_held(&pool_lock); - /* Update current and new pools if required and possible. */ - if (!depot_update_pools(prealloc)) + /* This should already be checked by public API entry points. */ + if (WARN_ON_ONCE(!size)) return NULL; /* Check if we have a stack record to save the stack trace. */ - if (list_empty(&free_stacks)) - return NULL; - - /* Get and unlink the first entry from the freelist. */ - stack = list_first_entry(&free_stacks, struct stack_record, list); - list_del(&stack->list); + stack = depot_pop_free(); + if (!stack) { + /* No usable entries on the freelist - try to refill the freelist. */ + if (!depot_try_init_pool(prealloc)) + return NULL; + stack = depot_pop_free(); + if (WARN_ON(!stack)) + return NULL; + } /* Limit number of saved frames to CONFIG_STACKDEPOT_MAX_FRAMES. */ if (size > CONFIG_STACKDEPOT_MAX_FRAMES) @@ -394,38 +469,80 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) */ kmsan_unpoison_memory(stack, DEPOT_STACK_RECORD_SIZE); + counters[DEPOT_COUNTER_ALLOCS]++; + counters[DEPOT_COUNTER_INUSE]++; return stack; } static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle) { + const int pools_num_cached = READ_ONCE(pools_num); union handle_parts parts = { .handle = handle }; void *pool; size_t offset = parts.offset << DEPOT_STACK_ALIGN; struct stack_record *stack; - lockdep_assert_held(&pool_rwlock); + lockdep_assert_not_held(&pool_lock); - if (parts.pool_index > pools_num) { + if (parts.pool_index > pools_num_cached) { WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n", - parts.pool_index, pools_num, handle); + parts.pool_index, pools_num_cached, handle); return NULL; } pool = stack_pools[parts.pool_index]; - if (!pool) + if (WARN_ON(!pool)) return NULL; stack = pool + offset; + if (WARN_ON(!refcount_read(&stack->count))) + return NULL; + return stack; } /* Links stack into the freelist. */ static void depot_free_stack(struct stack_record *stack) { - lockdep_assert_held_write(&pool_rwlock); + unsigned long flags; + + lockdep_assert_not_held(&pool_lock); - list_add(&stack->list, &free_stacks); + raw_spin_lock_irqsave(&pool_lock, flags); + printk_deferred_enter(); + + /* + * Remove the entry from the hash list. Concurrent list traversal may + * still observe the entry, but since the refcount is zero, this entry + * will no longer be considered as valid. + */ + list_del_rcu(&stack->hash_list); + + /* + * Due to being used from constrained contexts such as the allocators, + * NMI, or even RCU itself, stack depot cannot rely on primitives that + * would sleep (such as synchronize_rcu()) or recursively call into + * stack depot again (such as call_rcu()). + * + * Instead, get an RCU cookie, so that we can ensure this entry isn't + * moved onto another list until the next grace period, and concurrent + * RCU list traversal remains safe. + */ + stack->rcu_state = get_state_synchronize_rcu(); + + /* + * Add the entry to the freelist tail, so that older entries are + * considered first - their RCU cookie is more likely to no longer be + * associated with the current grace period. + */ + list_add_tail(&stack->free_list, &free_stacks); + + counters[DEPOT_COUNTER_FREELIST_SIZE]++; + counters[DEPOT_COUNTER_FREES]++; + counters[DEPOT_COUNTER_INUSE]--; + + printk_deferred_exit(); + raw_spin_unlock_irqrestore(&pool_lock, flags); } /* Calculates the hash for a stack. */ @@ -453,22 +570,52 @@ int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2, /* Finds a stack in a bucket of the hash table. */ static inline struct stack_record *find_stack(struct list_head *bucket, - unsigned long *entries, int size, - u32 hash) + unsigned long *entries, int size, + u32 hash, depot_flags_t flags) { - struct list_head *pos; - struct stack_record *found; + struct stack_record *stack, *ret = NULL; + + /* + * Stack depot may be used from instrumentation that instruments RCU or + * tracing itself; use variant that does not call into RCU and cannot be + * traced. + * + * Note: Such use cases must take care when using refcounting to evict + * unused entries, because the stack record free-then-reuse code paths + * do call into RCU. + */ + rcu_read_lock_sched_notrace(); - lockdep_assert_held(&pool_rwlock); + list_for_each_entry_rcu(stack, bucket, hash_list) { + if (stack->hash != hash || stack->size != size) + continue; + + /* + * This may race with depot_free_stack() accessing the freelist + * management state unioned with @entries. The refcount is zero + * in that case and the below refcount_inc_not_zero() will fail. + */ + if (data_race(stackdepot_memcmp(entries, stack->entries, size))) + continue; + + /* + * Try to increment refcount. If this succeeds, the stack record + * is valid and has not yet been freed. + * + * If STACK_DEPOT_FLAG_GET is not used, it is undefined behavior + * to then call stack_depot_put() later, and we can assume that + * a stack record is never placed back on the freelist. + */ + if ((flags & STACK_DEPOT_FLAG_GET) && !refcount_inc_not_zero(&stack->count)) + continue; - list_for_each(pos, bucket) { - found = list_entry(pos, struct stack_record, list); - if (found->hash == hash && - found->size == size && - !stackdepot_memcmp(entries, found->entries, size)) - return found; + ret = stack; + break; } - return NULL; + + rcu_read_unlock_sched_notrace(); + + return ret; } depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, @@ -482,7 +629,6 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, struct page *page = NULL; void *prealloc = NULL; bool can_alloc = depot_flags & STACK_DEPOT_FLAG_CAN_ALLOC; - bool need_alloc = false; unsigned long flags; u32 hash; @@ -505,31 +651,16 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, hash = hash_stack(entries, nr_entries); bucket = &stack_table[hash & stack_hash_mask]; - read_lock_irqsave(&pool_rwlock, flags); - printk_deferred_enter(); - - /* Fast path: look the stack trace up without full locking. */ - found = find_stack(bucket, entries, nr_entries, hash); - if (found) { - if (depot_flags & STACK_DEPOT_FLAG_GET) - refcount_inc(&found->count); - printk_deferred_exit(); - read_unlock_irqrestore(&pool_rwlock, flags); + /* Fast path: look the stack trace up without locking. */ + found = find_stack(bucket, entries, nr_entries, hash, depot_flags); + if (found) goto exit; - } - - /* Take note if another stack pool needs to be allocated. */ - if (new_pool_required) - need_alloc = true; - - printk_deferred_exit(); - read_unlock_irqrestore(&pool_rwlock, flags); /* * Allocate memory for a new pool if required now: * we won't be able to do that under the lock. */ - if (unlikely(can_alloc && need_alloc)) { + if (unlikely(can_alloc && READ_ONCE(new_pool_required))) { /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic @@ -543,31 +674,36 @@ depot_stack_handle_t stack_depot_save_flags(unsigned long *entries, prealloc = page_address(page); } - write_lock_irqsave(&pool_rwlock, flags); + raw_spin_lock_irqsave(&pool_lock, flags); printk_deferred_enter(); - found = find_stack(bucket, entries, nr_entries, hash); + /* Try to find again, to avoid concurrently inserting duplicates. */ + found = find_stack(bucket, entries, nr_entries, hash, depot_flags); if (!found) { struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc); if (new) { - list_add(&new->list, bucket); + /* + * This releases the stack record into the bucket and + * makes it visible to readers in find_stack(). + */ + list_add_rcu(&new->hash_list, bucket); found = new; } - } else { - if (depot_flags & STACK_DEPOT_FLAG_GET) - refcount_inc(&found->count); + } + + if (prealloc) { /* - * Stack depot already contains this stack trace, but let's - * keep the preallocated memory for future. + * Either stack depot already contains this stack trace, or + * depot_alloc_stack() did not consume the preallocated memory. + * Try to keep the preallocated memory for future. */ - if (prealloc) - depot_keep_new_pool(&prealloc); + depot_keep_new_pool(&prealloc); } printk_deferred_exit(); - write_unlock_irqrestore(&pool_rwlock, flags); + raw_spin_unlock_irqrestore(&pool_lock, flags); exit: if (prealloc) { /* Stack depot didn't use this memory, free it. */ @@ -592,7 +728,6 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries) { struct stack_record *stack; - unsigned long flags; *entries = NULL; /* @@ -604,13 +739,13 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, if (!handle || stack_depot_disabled) return 0; - read_lock_irqsave(&pool_rwlock, flags); - printk_deferred_enter(); - stack = depot_fetch_stack(handle); - - printk_deferred_exit(); - read_unlock_irqrestore(&pool_rwlock, flags); + /* + * Should never be NULL, otherwise this is a use-after-put (or just a + * corrupt handle). + */ + if (WARN(!stack, "corrupt handle or use after stack_depot_put()")) + return 0; *entries = stack->entries; return stack->size; @@ -620,29 +755,20 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch); void stack_depot_put(depot_stack_handle_t handle) { struct stack_record *stack; - unsigned long flags; if (!handle || stack_depot_disabled) return; - write_lock_irqsave(&pool_rwlock, flags); - printk_deferred_enter(); - stack = depot_fetch_stack(handle); - if (WARN_ON(!stack)) - goto out; - - if (refcount_dec_and_test(&stack->count)) { - /* Unlink stack from the hash table. */ - list_del(&stack->list); + /* + * Should always be able to find the stack record, otherwise this is an + * unbalanced put attempt (or corrupt handle). + */ + if (WARN(!stack, "corrupt handle or unbalanced stack_depot_put()")) + return; - /* Free stack. */ + if (refcount_dec_and_test(&stack->count)) depot_free_stack(stack); - } - -out: - printk_deferred_exit(); - write_unlock_irqrestore(&pool_rwlock, flags); } EXPORT_SYMBOL_GPL(stack_depot_put); @@ -690,3 +816,30 @@ unsigned int stack_depot_get_extra_bits(depot_stack_handle_t handle) return parts.extra; } EXPORT_SYMBOL(stack_depot_get_extra_bits); + +static int stats_show(struct seq_file *seq, void *v) +{ + /* + * data race ok: These are just statistics counters, and approximate + * statistics are ok for debugging. + */ + seq_printf(seq, "pools: %d\n", data_race(pools_num)); + for (int i = 0; i < DEPOT_COUNTER_COUNT; i++) + seq_printf(seq, "%s: %ld\n", counter_names[i], data_race(counters[i])); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(stats); + +static int depot_debugfs_init(void) +{ + struct dentry *dir; + + if (stack_depot_disabled) + return 0; + + dir = debugfs_create_dir("stackdepot", NULL); + debugfs_create_file("stats", 0444, dir, NULL, &stats_fops); + return 0; +} +late_initcall(depot_debugfs_init); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 94ef5c02b459..94c958f7ebb5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -37,6 +37,7 @@ #include <linux/page_owner.h> #include <linux/sched/sysctl.h> #include <linux/memory-tiers.h> +#include <linux/compat.h> #include <asm/tlb.h> #include <asm/pgalloc.h> @@ -809,7 +810,10 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, { loff_t off_end = off + len; loff_t off_align = round_up(off, size); - unsigned long len_pad, ret; + unsigned long len_pad, ret, off_sub; + + if (IS_ENABLED(CONFIG_32BIT) || in_compat_syscall()) + return 0; if (off_end <= off_align || (off_end - off_align) < size) return 0; @@ -835,7 +839,13 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, if (ret == addr) return addr; - ret += (off - ret) & (size - 1); + off_sub = (off - ret) & (size - 1); + + if (current->mm->get_unmapped_area == arch_get_unmapped_area_topdown && + !off_sub) + return ret + size; + + ret += off_sub; return ret; } @@ -2437,7 +2447,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, page = pmd_page(old_pmd); folio = page_folio(page); if (!folio_test_dirty(folio) && pmd_dirty(old_pmd)) - folio_set_dirty(folio); + folio_mark_dirty(folio); if (!folio_test_referenced(folio) && pmd_young(old_pmd)) folio_set_referenced(folio); folio_remove_rmap_pmd(folio, page, vma); @@ -3563,7 +3573,7 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, } if (pmd_dirty(pmdval)) - folio_set_dirty(folio); + folio_mark_dirty(folio); if (pmd_write(pmdval)) entry = make_writable_migration_entry(page_to_pfn(page)); else if (anon_exclusive) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e4c8735e7c85..46d8d02114cf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2623,8 +2623,9 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg, } /* - * Scheduled by try_charge() to be executed from the userland return path - * and reclaims memory over the high limit. + * Reclaims memory over the high limit. Called directly from + * try_charge() (context permitting), as well as from the userland + * return path where reclaim is always able to block. */ void mem_cgroup_handle_over_high(gfp_t gfp_mask) { @@ -2644,6 +2645,17 @@ void mem_cgroup_handle_over_high(gfp_t gfp_mask) retry_reclaim: /* + * Bail if the task is already exiting. Unlike memory.max, + * memory.high enforcement isn't as strict, and there is no + * OOM killer involved, which means the excess could already + * be much bigger (and still growing) than it could for + * memory.max; the dying task could get stuck in fruitless + * reclaim for a long time, which isn't desirable. + */ + if (task_is_dying()) + goto out; + + /* * The allocating task should reclaim at least the batch size, but for * subsequent retries we only want to do what's necessary to prevent oom * or breaching resource isolation. @@ -2693,6 +2705,9 @@ retry_reclaim: } /* + * Reclaim didn't manage to push usage below the limit, slow + * this allocating task down. + * * If we exit early, we're guaranteed to die (since * schedule_timeout_killable sets TASK_KILLABLE). This means we don't * need to account for any ill-begotten jiffies to pay them off later. @@ -2887,11 +2902,17 @@ done_restock: } } while ((memcg = parent_mem_cgroup(memcg))); + /* + * Reclaim is set up above to be called from the userland + * return path. But also attempt synchronous reclaim to avoid + * excessive overrun while the task is still inside the + * kernel. If this is successful, the return path will see it + * when it rechecks the overage and simply bail out. + */ if (current->memcg_nr_pages_over_high > MEMCG_CHARGE_BATCH && !(current->flags & PF_MEMALLOC) && - gfpflags_allow_blocking(gfp_mask)) { + gfpflags_allow_blocking(gfp_mask)) mem_cgroup_handle_over_high(gfp_mask); - } return 0; } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 4f9b61f4a668..636280d04008 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -982,7 +982,7 @@ static bool has_extra_refcount(struct page_state *ps, struct page *p, int count = page_count(p) - 1; if (extra_pins) - count -= 1; + count -= folio_nr_pages(page_folio(p)); if (count > 0) { pr_err("%#lx: %s still referenced by %d users\n", diff --git a/mm/memory.c b/mm/memory.c index 7e1f4849463a..89bcae0b224d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1464,7 +1464,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, delay_rmap = 0; if (!folio_test_anon(folio)) { if (pte_dirty(ptent)) { - folio_set_dirty(folio); + folio_mark_dirty(folio); if (tlb_delay_rmap(tlb)) { delay_rmap = 1; force_flush = 1; diff --git a/mm/mmap.c b/mm/mmap.c index b78e83d351d2..d89770eaab6b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1825,15 +1825,17 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, /* * mmap_region() will call shmem_zero_setup() to create a file, * so use shmem's get_unmapped_area in case it can be huge. - * do_mmap() will clear pgoff, so match alignment. */ - pgoff = 0; get_area = shmem_get_unmapped_area; } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { /* Ensures that larger anonymous mappings are THP aligned. */ get_area = thp_get_unmapped_area; } + /* Always treat pgoff as zero for anonymous memory. */ + if (!file) + pgoff = 0; + addr = get_area(file, addr, len, pgoff, flags); if (IS_ERR_VALUE(addr)) return addr; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cd4e4ae77c40..02147b61712b 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1638,7 +1638,7 @@ static inline void wb_dirty_limits(struct dirty_throttle_control *dtc) */ dtc->wb_thresh = __wb_calc_thresh(dtc); dtc->wb_bg_thresh = dtc->thresh ? - div_u64((u64)dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; + div64_u64(dtc->wb_thresh * dtc->bg_thresh, dtc->thresh) : 0; /* * In order to avoid the stacked BDI deadlock we need diff --git a/mm/readahead.c b/mm/readahead.c index 23620c57c122..2648ec4f0494 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -469,7 +469,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, if (!folio) return -ENOMEM; - mark = round_up(mark, 1UL << order); + mark = round_down(mark, 1UL << order); if (index == mark) folio_set_readahead(folio); err = filemap_add_folio(ractl->mapping, folio, index, gfp); @@ -575,7 +575,7 @@ static void ondemand_readahead(struct readahead_control *ractl, * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - expected = round_up(ra->start + ra->size - ra->async_size, + expected = round_down(ra->start + ra->size - ra->async_size, 1UL << order); if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 20e3b0d9cf7e..75fcf1f783bc 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -357,6 +357,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb( unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, uffd_flags_t flags) { struct mm_struct *dst_mm = dst_vma->vm_mm; @@ -472,6 +473,15 @@ retry: goto out; } mmap_read_lock(dst_mm); + /* + * If memory mappings are changing because of non-cooperative + * operation (e.g. mremap) running in parallel, bail out and + * request the user to retry later + */ + if (mmap_changing && atomic_read(mmap_changing)) { + err = -EAGAIN; + break; + } dst_vma = NULL; goto retry; @@ -506,6 +516,7 @@ extern ssize_t mfill_atomic_hugetlb(struct vm_area_struct *dst_vma, unsigned long dst_start, unsigned long src_start, unsigned long len, + atomic_t *mmap_changing, uffd_flags_t flags); #endif /* CONFIG_HUGETLB_PAGE */ @@ -622,8 +633,8 @@ retry: * If this is a HUGETLB vma, pass off to appropriate routine */ if (is_vm_hugetlb_page(dst_vma)) - return mfill_atomic_hugetlb(dst_vma, dst_start, - src_start, len, flags); + return mfill_atomic_hugetlb(dst_vma, dst_start, src_start, + len, mmap_changing, flags); if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) goto out_unlock; diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index d982daea8329..14088c4ff2f6 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -2175,6 +2175,7 @@ void batadv_mcast_free(struct batadv_priv *bat_priv) cancel_delayed_work_sync(&bat_priv->mcast.work); batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2); + batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST_TRACKER, 1); batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2); /* safely calling outside of worker, as worker was canceled above */ @@ -2198,6 +2199,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig) BATADV_MCAST_WANT_NO_RTR4); batadv_mcast_want_rtr6_update(bat_priv, orig, BATADV_MCAST_WANT_NO_RTR6); + batadv_mcast_have_mc_ptype_update(bat_priv, orig, + BATADV_MCAST_HAVE_MC_PTYPE_CAPA); spin_unlock_bh(&orig->mcast_handler_lock); } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d7d021af1029..2d7b73242958 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1762,6 +1762,10 @@ static void br_ip6_multicast_querier_expired(struct timer_list *t) } #endif +static void br_multicast_query_delay_expired(struct timer_list *t) +{ +} + static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx, struct br_ip *ip, struct sk_buff *skb) @@ -3198,7 +3202,7 @@ br_multicast_update_query_timer(struct net_bridge_mcast *brmctx, unsigned long max_delay) { if (!timer_pending(&query->timer)) - query->delay_time = jiffies + max_delay; + mod_timer(&query->delay_timer, jiffies + max_delay); mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval); } @@ -4041,13 +4045,11 @@ void br_multicast_ctx_init(struct net_bridge *br, brmctx->multicast_querier_interval = 255 * HZ; brmctx->multicast_membership_interval = 260 * HZ; - brmctx->ip4_other_query.delay_time = 0; brmctx->ip4_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock); brmctx->multicast_igmp_version = 2; #if IS_ENABLED(CONFIG_IPV6) brmctx->multicast_mld_version = 1; - brmctx->ip6_other_query.delay_time = 0; brmctx->ip6_querier.port_ifidx = 0; seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock); #endif @@ -4056,6 +4058,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip4_multicast_local_router_expired, 0); timer_setup(&brmctx->ip4_other_query.timer, br_ip4_multicast_querier_expired, 0); + timer_setup(&brmctx->ip4_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip4_own_query.timer, br_ip4_multicast_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) @@ -4063,6 +4067,8 @@ void br_multicast_ctx_init(struct net_bridge *br, br_ip6_multicast_local_router_expired, 0); timer_setup(&brmctx->ip6_other_query.timer, br_ip6_multicast_querier_expired, 0); + timer_setup(&brmctx->ip6_other_query.delay_timer, + br_multicast_query_delay_expired, 0); timer_setup(&brmctx->ip6_own_query.timer, br_ip6_multicast_query_expired, 0); #endif @@ -4197,10 +4203,12 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx) { del_timer_sync(&brmctx->ip4_mc_router_timer); del_timer_sync(&brmctx->ip4_other_query.timer); + del_timer_sync(&brmctx->ip4_other_query.delay_timer); del_timer_sync(&brmctx->ip4_own_query.timer); #if IS_ENABLED(CONFIG_IPV6) del_timer_sync(&brmctx->ip6_mc_router_timer); del_timer_sync(&brmctx->ip6_other_query.timer); + del_timer_sync(&brmctx->ip6_other_query.delay_timer); del_timer_sync(&brmctx->ip6_own_query.timer); #endif } @@ -4643,13 +4651,15 @@ int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val) max_delay = brmctx->multicast_query_response_interval; if (!timer_pending(&brmctx->ip4_other_query.timer)) - brmctx->ip4_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip4_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) if (!timer_pending(&brmctx->ip6_other_query.timer)) - brmctx->ip6_other_query.delay_time = jiffies + max_delay; + mod_timer(&brmctx->ip6_other_query.delay_timer, + jiffies + max_delay); br_multicast_start_querier(brmctx, &brmctx->ip6_own_query); #endif diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b0a92c344722..86ea5e6689b5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -78,7 +78,7 @@ struct bridge_mcast_own_query { /* other querier */ struct bridge_mcast_other_query { struct timer_list timer; - unsigned long delay_time; + struct timer_list delay_timer; }; /* selected querier */ @@ -1159,7 +1159,7 @@ __br_multicast_querier_exists(struct net_bridge_mcast *brmctx, own_querier_enabled = false; } - return time_is_before_jiffies(querier->delay_time) && + return !timer_pending(&querier->delay_timer) && (own_querier_enabled || timer_pending(&querier->timer)); } diff --git a/net/devlink/port.c b/net/devlink/port.c index 62e54e152ecf..78592912f657 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -674,7 +674,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port, return -EOPNOTSUPP; } if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) { - NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR], + NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_STATE], "Function does not support state setting"); return -EOPNOTSUPP; } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 7ceb9ac6e730..9d71b66183da 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -308,7 +308,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "HSR: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n"); return; } @@ -355,7 +355,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, skb = hsr_init_skb(master); if (!skb) { - WARN_ONCE(1, "PRP: Could not send supervision frame\n"); + netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n"); return; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b06f678b03a1..41537d18eecf 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1287,6 +1287,12 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, if (unlikely(!rt)) return -EFAULT; + cork->fragsize = ip_sk_use_pmtu(sk) ? + dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); + + if (!inetdev_valid_mtu(cork->fragsize)) + return -ENETUNREACH; + /* * setup for corking. */ @@ -1303,12 +1309,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->addr = ipc->addr; } - cork->fragsize = ip_sk_use_pmtu(sk) ? - dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); - - if (!inetdev_valid_mtu(cork->fragsize)) - return -ENETUNREACH; - cork->gso_size = ipc->gso_size; cork->dst = &rt->dst; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 7aa9dc0e6760..21d2ffa919e9 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1363,12 +1363,13 @@ e_inval: * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer + * @drop_dst: if true, drops skb dst * * To support IP_CMSG_PKTINFO option, we store rt_iif and specific * destination in skb->cb[] before dst drop. * This way, receiver doesn't make cache line misses to read rtable. */ -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst) { struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); bool prepare = inet_test_bit(PKTINFO, sk) || @@ -1397,7 +1398,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - skb_dst_drop(skb); + if (drop_dst) + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d6f59531b3a..362229836510 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1073,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt, msg = (struct igmpmsg *)skb_network_header(skb); msg->im_vif = vifi; msg->im_vif_hi = vifi >> 8; - ipv4_pktinfo_prepare(mroute_sk, pkt); + ipv4_pktinfo_prepare(mroute_sk, pkt, false); memcpy(skb->cb, pkt->cb, sizeof(skb->cb)); /* Add our header */ igmp = skb_put(skb, sizeof(struct igmphdr)); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 27da9d7294c0..aea89326c697 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Charge it to the socket. */ - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_prepare(sk, skb, true); if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { kfree_skb_reason(skb, reason); return NET_RX_DROP; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a1c6de385cce..7e2481b9eae1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1786,7 +1786,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, static bool can_map_frag(const skb_frag_t *frag) { - return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag); + struct page *page; + + if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag)) + return false; + + page = skb_frag_page(frag); + + if (PageCompound(page) || page->mapping) + return false; + + return true; } static int find_next_mappable_frag(const skb_frag_t *frag, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 148ffb007969..f631b0a21af4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2169,7 +2169,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_csum_pull_header(skb); - ipv4_pktinfo_prepare(sk, skb); + ipv4_pktinfo_prepare(sk, skb, true); return __udp_queue_rcv_skb(sk, skb); csum_error: diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 507a8353a6bd..c008d21925d7 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -220,19 +220,26 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { EXPORT_SYMBOL_GPL(ipv6_stub); /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ -const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8) + = IN6ADDR_LOOPBACK_INIT; EXPORT_SYMBOL(in6addr_loopback); -const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; +const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8) + = IN6ADDR_ANY_INIT; EXPORT_SYMBOL(in6addr_any); -const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_linklocal_allnodes); -const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_linklocal_allrouters); -const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; +const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allnodes); -const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_interfacelocal_allrouters); -const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; +const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8) + = IN6ADDR_SITELOCAL_ALLROUTERS_INIT; EXPORT_SYMBOL(in6addr_sitelocal_allrouters); static void snmp6_free_dev(struct inet6_dev *idev) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 46c19bd48990..9bbabf750a21 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -796,8 +796,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, struct sk_buff *skb), bool log_ecn_err) { - const struct ipv6hdr *ipv6h = ipv6_hdr(skb); - int err; + const struct ipv6hdr *ipv6h; + int nh, err; if ((!(tpi->flags & TUNNEL_CSUM) && (tunnel->parms.i_flags & TUNNEL_CSUM)) || @@ -829,7 +829,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, goto drop; } - ipv6h = ipv6_hdr(skb); skb->protocol = eth_type_trans(skb, tunnel->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } else { @@ -837,7 +836,23 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_reset_mac_header(skb); } + /* Save offset of outer header relative to skb->head, + * because we are going to reset the network header to the inner header + * and might change skb->head. + */ + nh = skb_network_header(skb) - skb->head; + skb_reset_network_header(skb); + + if (!pskb_inet_may_pull(skb)) { + DEV_STATS_INC(tunnel->dev, rx_length_errors); + DEV_STATS_INC(tunnel->dev, rx_errors); + goto drop; + } + + /* Get the outer header. */ + ipv6h = (struct ipv6hdr *)(skb->head + nh); + memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); __skb_tunnel_rx(skb, tunnel->dev, tunnel->net); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 20551cfb7da6..fde1140d899e 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -226,6 +226,8 @@ static int llc_ui_release(struct socket *sock) } netdev_put(llc->dev, &llc->dev_tracker); sock_put(sk); + sock_orphan(sk); + sock->sk = NULL; llc_sk_free(sk); out: return 0; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3ed4709a7509..028e8b473626 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2314,9 +2314,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk) if (__mptcp_check_fallback(msk)) return false; - if (tcp_rtx_and_write_queues_empty(sk)) - return false; - /* the closing socket has some data untransmitted and/or unacked: * some data in the mptcp rtx queue has not really xmitted yet. * keep it simple and re-inject the whole mptcp level rtx queue diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 21f7860e8fa1..cb48a2b9cb9f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -30,6 +30,7 @@ #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_list IPSET_TOKEN(MTYPE, _list) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype MTYPE #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) @@ -59,9 +60,6 @@ mtype_destroy(struct ip_set *set) { struct mtype *map = set->data; - if (SET_WITH_TIMEOUT(set)) - del_timer_sync(&map->gc); - if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); ip_set_free(map->members); @@ -290,6 +288,15 @@ mtype_gc(struct timer_list *t) add_timer(&map->gc); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct mtype *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + del_timer_sync(&map->gc); +} + static const struct ip_set_type_variant mtype = { .kadt = mtype_kadt, .uadt = mtype_uadt, @@ -303,6 +310,7 @@ static const struct ip_set_type_variant mtype = { .head = mtype_head, .list = mtype_list, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, }; #endif /* __IP_SET_BITMAP_IP_GEN_H */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 4c133e06be1d..bcaad9c009fe 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1182,6 +1182,14 @@ ip_set_destroy_set(struct ip_set *set) kfree(set); } +static void +ip_set_destroy_set_rcu(struct rcu_head *head) +{ + struct ip_set *set = container_of(head, struct ip_set, rcu); + + ip_set_destroy_set(set); +} + static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { @@ -1193,8 +1201,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); /* Commands are serialized and references are * protected by the ip_set_ref_lock. @@ -1206,8 +1212,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, * counter, so if it's already zero, we can proceed * without holding the lock. */ - read_lock_bh(&ip_set_ref_lock); if (!attr[IPSET_ATTR_SETNAME]) { + /* Must wait for flush to be really finished in list:set */ + rcu_barrier(); + read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { @@ -1221,6 +1229,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, s = ip_set(inst, i); if (s) { ip_set(inst, i) = NULL; + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); ip_set_destroy_set(s); } } @@ -1228,6 +1238,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, inst->is_destroyed = false; } else { u32 flags = flag_exist(info->nlh); + u16 features = 0; + + read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { @@ -1238,10 +1251,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, ret = -IPSET_ERR_BUSY; goto out; } + features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); - - ip_set_destroy_set(s); + if (features & IPSET_TYPE_NAME) { + /* Must wait for flush to be really finished */ + rcu_barrier(); + } + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); + call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; out: @@ -1394,9 +1413,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); - /* Make sure all readers of the old set pointers are completed. */ - synchronize_rcu(); - return 0; } @@ -2409,8 +2425,11 @@ ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - unregister_pernet_subsys(&ip_set_net_ops); + + /* Wait for call_rcu() in destroy */ + rcu_barrier(); + pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index cbf80da9a01c..1136510521a8 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -222,6 +222,7 @@ static const union nf_inet_addr zeromask = {}; #undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init +#undef mtype_cancel_gc #undef mtype_variant #undef mtype_data_match @@ -266,6 +267,7 @@ static const union nf_inet_addr zeromask = {}; #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) +#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) @@ -450,9 +452,6 @@ mtype_destroy(struct ip_set *set) struct htype *h = set->data; struct list_head *l, *lt; - if (SET_WITH_TIMEOUT(set)) - cancel_delayed_work_sync(&h->gc.dwork); - mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true); list_for_each_safe(l, lt, &h->ad) { list_del(l); @@ -599,6 +598,15 @@ mtype_gc_init(struct htable_gc *gc) queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } +static void +mtype_cancel_gc(struct ip_set *set) +{ + struct htype *h = set->data; + + if (SET_WITH_TIMEOUT(set)) + cancel_delayed_work_sync(&h->gc.dwork); +} + static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags); @@ -1441,6 +1449,7 @@ static const struct ip_set_type_variant mtype_variant = { .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, + .cancel_gc = mtype_cancel_gc, .region_lock = true, }; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index e162636525cf..6c3f28bc59b3 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set) struct list_set *map = set->data; struct set_elem *e, *n; - if (SET_WITH_TIMEOUT(set)) - timer_shutdown_sync(&map->gc); - list_for_each_entry_safe(e, n, &map->members, list) { list_del(&e->list); ip_set_put_byindex(map->net, e->id); @@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b) a->extensions == b->extensions; } +static void +list_set_cancel_gc(struct ip_set *set) +{ + struct list_set *map = set->data; + + if (SET_WITH_TIMEOUT(set)) + timer_shutdown_sync(&map->gc); +} + static const struct ip_set_type_variant set_variant = { .kadt = list_set_kadt, .uadt = list_set_uadt, @@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = { .head = list_set_head, .list = list_set_list, .same_set = list_set_same_set, + .cancel_gc = list_set_cancel_gc, }; static void diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c6bd533983c1..4cc97f971264 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -283,7 +283,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; - } else { + } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) { /* If it is a shutdown ack OOTB packet, we expect a return shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ pr_debug("Setting vtag %x for new conn OOTB\n", diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index e573be5afde7..ae493599a3ef 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, - u32 end, u32 win) + u32 end, u32 win, + enum ip_conntrack_dir dir) { /* SYN-ACK in reply to a SYN * or SYN from reply direction in simultaneous open. @@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, * Both sides must send the Window Scale option * to enable window scaling in either direction. */ - if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && + if (dir == IP_CT_DIR_REPLY && + !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) { sender->td_scale = 0; receiver->td_scale = 0; @@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, if (tcph->syn) { tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (!tcph->ack) /* Simultaneous open */ return NFCT_TCP_ACCEPT; @@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, */ tcp_init_sender(sender, receiver, skb, dataoff, tcph, - end, win); + end, win, dir); if (dir == IP_CT_DIR_REPLY && !tcph->ack) return NFCT_TCP_ACCEPT; diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8cc52d2bd31b..e16f158388bb 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -193,11 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type) return; } - BUG_ON(loggers[pf][type] == NULL); - rcu_read_lock(); logger = rcu_dereference(loggers[pf][type]); - module_put(logger->me); + if (!logger) + WARN_ON_ONCE(1); + else + module_put(logger->me); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_logger_put); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c537104411e7..fc016befb46f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7551,11 +7551,15 @@ nla_put_failure: return -1; } -static const struct nft_object_type *__nft_obj_type_get(u32 objtype) +static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family) { const struct nft_object_type *type; list_for_each_entry(type, &nf_tables_objects, list) { + if (type->family != NFPROTO_UNSPEC && + type->family != family) + continue; + if (objtype == type->type) return type; } @@ -7563,11 +7567,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype) } static const struct nft_object_type * -nft_obj_type_get(struct net *net, u32 objtype) +nft_obj_type_get(struct net *net, u32 objtype, u8 family) { const struct nft_object_type *type; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (type != NULL && try_module_get(type->owner)) return type; @@ -7660,7 +7664,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; - type = __nft_obj_type_get(objtype); + type = __nft_obj_type_get(objtype, family); if (WARN_ON_ONCE(!type)) return -ENOENT; @@ -7674,7 +7678,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, if (!nft_use_inc(&table->use)) return -EMFILE; - type = nft_obj_type_get(net, objtype); + type = nft_obj_type_get(net, objtype, family); if (IS_ERR(type)) { err = PTR_ERR(type); goto err_type; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 86bb9d7797d9..aac98a3c966e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1250,7 +1250,31 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_EXPECT_L3PROTO]) priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO])); + switch (priv->l3num) { + case NFPROTO_IPV4: + case NFPROTO_IPV6: + if (priv->l3num != ctx->family) + return -EINVAL; + + fallthrough; + case NFPROTO_INET: + break; + default: + return -EOPNOTSUPP; + } + priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]); + switch (priv->l4proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_DCCP: + case IPPROTO_SCTP: + break; + default: + return -EOPNOTSUPP; + } + priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]); priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]); priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]); diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 9f21953c7433..f735d79d8be5 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = { static struct nft_object_type nft_tunnel_obj_type __read_mostly = { .type = NFT_OBJECT_TUNNEL, + .family = NFPROTO_NETDEV, .ops = &nft_tunnel_obj_ops, .maxattr = NFTA_TUNNEL_KEY_MAX, .policy = nft_tunnel_key_policy, diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 97348cedb16b..cdad47b140fa 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1208,6 +1208,10 @@ void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); nci_hci_deallocate(ndev); + + /* drop partial rx data packet if present */ + if (ndev->rx_data_reassembly) + kfree_skb(ndev->rx_data_reassembly); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 95cc95458e2d..e4c858411207 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1877,9 +1877,15 @@ static bool smcd_lgr_match(struct smc_link_group *lgr, struct smcd_dev *smcismdev, struct smcd_gid *peer_gid) { - return lgr->peer_gid.gid == peer_gid->gid && lgr->smcd == smcismdev && - smc_ism_is_virtual(smcismdev) ? - (lgr->peer_gid.gid_ext == peer_gid->gid_ext) : 1; + if (lgr->peer_gid.gid != peer_gid->gid || + lgr->smcd != smcismdev) + return false; + + if (smc_ism_is_virtual(smcismdev) && + lgr->peer_gid.gid_ext != peer_gid->gid_ext) + return false; + + return true; } /* create a new SMC connection (and a new link group if necessary) */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index f60c93e5a25d..b969e505c7b7 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1598,10 +1598,10 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) /* Finally, send the reply synchronously */ if (rqstp->bc_to_initval > 0) { timeout.to_initval = rqstp->bc_to_initval; - timeout.to_retries = rqstp->bc_to_initval; + timeout.to_retries = rqstp->bc_to_retries; } else { timeout.to_initval = req->rq_xprt->timeout->to_initval; - timeout.to_initval = req->rq_xprt->timeout->to_retries; + timeout.to_retries = req->rq_xprt->timeout->to_retries; } memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); task = rpc_run_bc_task(req, &timeout); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ac1f2bc18fc9..30b178ebba60 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1344,13 +1344,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2) unix_state_lock(sk1); return; } - if (sk1 < sk2) { - unix_state_lock(sk1); - unix_state_lock_nested(sk2); - } else { - unix_state_lock(sk2); - unix_state_lock_nested(sk1); - } + if (sk1 > sk2) + swap(sk1, sk2); + + unix_state_lock(sk1); + unix_state_lock_nested(sk2, U_LOCK_SECOND); } static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2) @@ -1591,7 +1589,7 @@ restart: goto out_unlock; } - unix_state_lock_nested(sk); + unix_state_lock_nested(sk, U_LOCK_SECOND); if (sk->sk_state != st) { unix_state_unlock(sk); diff --git a/net/unix/diag.c b/net/unix/diag.c index bec09a3a1d44..be19827eca36 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) * queue lock. With the other's queue locked it's * OK to lock the state. */ - unix_state_lock_nested(req); + unix_state_lock_nested(req, U_LOCK_DIAG); peer = unix_sk(req)->peer; buf[i++] = (peer ? sock_i_ino(peer) : 0); unix_state_unlock(req); diff --git a/scripts/Makefile.defconf b/scripts/Makefile.defconf index ab271b2051a2..226ea3df3b4b 100644 --- a/scripts/Makefile.defconf +++ b/scripts/Makefile.defconf @@ -9,8 +9,8 @@ # Input config fragments without '.config' suffix define merge_into_defconfig $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config) + -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ + $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef @@ -23,7 +23,7 @@ endef # Input config fragments without '.config' suffix define merge_into_defconfig_override $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh \ - -Q -m -O $(objtree) $(srctree)/arch/$(ARCH)/configs/$(1) \ - $(foreach config,$(2),$(srctree)/arch/$(ARCH)/configs/$(config).config) + -Q -m -O $(objtree) $(srctree)/arch/$(SRCARCH)/configs/$(1) \ + $(foreach config,$(2),$(srctree)/arch/$(SRCARCH)/configs/$(config).config) +$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig endef diff --git a/scripts/kconfig/symbol.c b/scripts/kconfig/symbol.c index 3e808528aaea..e9e9fb8d8674 100644 --- a/scripts/kconfig/symbol.c +++ b/scripts/kconfig/symbol.c @@ -345,6 +345,8 @@ void sym_calc_value(struct symbol *sym) oldval = sym->curr; + newval.tri = no; + switch (sym->type) { case S_INT: newval.val = "0"; @@ -357,7 +359,7 @@ void sym_calc_value(struct symbol *sym) break; case S_BOOLEAN: case S_TRISTATE: - newval = symbol_no.curr; + newval.val = "n"; break; default: sym->curr.val = sym->name; diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 795b21154446..267b9a0a3abc 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -70,9 +70,7 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...) break; case LOG_ERROR: fprintf(stderr, "ERROR: "); - break; - case LOG_FATAL: - fprintf(stderr, "FATAL: "); + error_occurred = true; break; default: /* invalid loglevel, ignore */ break; @@ -83,16 +81,8 @@ void modpost_log(enum loglevel loglevel, const char *fmt, ...) va_start(arglist, fmt); vfprintf(stderr, fmt, arglist); va_end(arglist); - - if (loglevel == LOG_FATAL) - exit(1); - if (loglevel == LOG_ERROR) - error_occurred = true; } -void __attribute__((alias("modpost_log"))) -modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); - static inline bool strends(const char *str, const char *postfix) { if (strlen(str) < strlen(postfix)) @@ -806,7 +796,8 @@ static void check_section(const char *modname, struct elf_info *elf, #define DATA_SECTIONS ".data", ".data.rel" #define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \ - ".kprobes.text", ".cpuidle.text", ".noinstr.text" + ".kprobes.text", ".cpuidle.text", ".noinstr.text", \ + ".ltext", ".ltext.*" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ ".fixup", ".entry.text", ".exception.text", \ ".coldtext", ".softirqentry.text" diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index 835cababf1b0..ee43c7950636 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -194,15 +194,11 @@ void *sym_get_data(const struct elf_info *info, const Elf_Sym *sym); enum loglevel { LOG_WARN, LOG_ERROR, - LOG_FATAL }; void __attribute__((format(printf, 2, 3))) modpost_log(enum loglevel loglevel, const char *fmt, ...); -void __attribute__((format(printf, 2, 3), noreturn)) -modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); - /* * warn - show the given message, then let modpost continue running, still * allowing modpost to exit successfully. This should be used when @@ -218,4 +214,4 @@ modpost_log_noret(enum loglevel loglevel, const char *fmt, ...); */ #define warn(fmt, args...) modpost_log(LOG_WARN, fmt, ##args) #define error(fmt, args...) modpost_log(LOG_ERROR, fmt, ##args) -#define fatal(fmt, args...) modpost_log_noret(LOG_FATAL, fmt, ##args) +#define fatal(fmt, args...) do { error(fmt, ##args); exit(1); } while (1) diff --git a/scripts/package/kernel.spec b/scripts/package/kernel.spec index 89298983a169..f58726671fb3 100644 --- a/scripts/package/kernel.spec +++ b/scripts/package/kernel.spec @@ -55,12 +55,12 @@ patch -p1 < %{SOURCE2} %{make} %{makeflags} KERNELRELEASE=%{KERNELRELEASE} KBUILD_BUILD_VERSION=%{release} %install -mkdir -p %{buildroot}/boot -cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/vmlinuz-%{KERNELRELEASE} +mkdir -p %{buildroot}/lib/modules/%{KERNELRELEASE} +cp $(%{make} %{makeflags} -s image_name) %{buildroot}/lib/modules/%{KERNELRELEASE}/vmlinuz %{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install %{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install -cp System.map %{buildroot}/boot/System.map-%{KERNELRELEASE} -cp .config %{buildroot}/boot/config-%{KERNELRELEASE} +cp System.map %{buildroot}/lib/modules/%{KERNELRELEASE} +cp .config %{buildroot}/lib/modules/%{KERNELRELEASE}/config ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/build %if %{with_devel} %{make} %{makeflags} run-command KBUILD_RUN_COMMAND='${srctree}/scripts/package/install-extmod-build %{buildroot}/usr/src/kernels/%{KERNELRELEASE}' @@ -70,13 +70,14 @@ ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEA rm -rf %{buildroot} %post -if [ -x /sbin/installkernel -a -r /boot/vmlinuz-%{KERNELRELEASE} -a -r /boot/System.map-%{KERNELRELEASE} ]; then -cp /boot/vmlinuz-%{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm -cp /boot/System.map-%{KERNELRELEASE} /boot/.System.map-%{KERNELRELEASE}-rpm -rm -f /boot/vmlinuz-%{KERNELRELEASE} /boot/System.map-%{KERNELRELEASE} -/sbin/installkernel %{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm -rm -f /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm +if [ -x /usr/bin/kernel-install ]; then + /usr/bin/kernel-install add %{KERNELRELEASE} /lib/modules/%{KERNELRELEASE}/vmlinuz fi +for file in vmlinuz System.map config; do + if ! cmp --silent "/lib/modules/%{KERNELRELEASE}/${file}" "/boot/${file}-%{KERNELRELEASE}"; then + cp "/lib/modules/%{KERNELRELEASE}/${file}" "/boot/${file}-%{KERNELRELEASE}" + fi +done %preun if [ -x /sbin/new-kernel-pkg ]; then @@ -94,7 +95,6 @@ fi %defattr (-, root, root) /lib/modules/%{KERNELRELEASE} %exclude /lib/modules/%{KERNELRELEASE}/build -/boot/* %files headers %defattr (-, root, root) diff --git a/security/security.c b/security/security.c index 0144a98d3712..3aaad75c9ce8 100644 --- a/security/security.c +++ b/security/security.c @@ -4255,7 +4255,19 @@ EXPORT_SYMBOL(security_inode_setsecctx); */ int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) { - return call_int_hook(inode_getsecctx, -EOPNOTSUPP, inode, ctx, ctxlen); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.inode_getsecctx, list) { + rc = hp->hook.inode_getsecctx(inode, ctx, ctxlen); + if (rc != LSM_RET_DEFAULT(inode_getsecctx)) + return rc; + } + + return LSM_RET_DEFAULT(inode_getsecctx); } EXPORT_SYMBOL(security_inode_getsecctx); @@ -4612,8 +4624,20 @@ EXPORT_SYMBOL(security_sock_rcv_skb); int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, sockptr_t optlen, unsigned int len) { - return call_int_hook(socket_getpeersec_stream, -ENOPROTOOPT, sock, - optval, optlen, len); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_stream, + list) { + rc = hp->hook.socket_getpeersec_stream(sock, optval, optlen, + len); + if (rc != LSM_RET_DEFAULT(socket_getpeersec_stream)) + return rc; + } + return LSM_RET_DEFAULT(socket_getpeersec_stream); } /** @@ -4633,8 +4657,19 @@ int security_socket_getpeersec_stream(struct socket *sock, sockptr_t optval, int security_socket_getpeersec_dgram(struct socket *sock, struct sk_buff *skb, u32 *secid) { - return call_int_hook(socket_getpeersec_dgram, -ENOPROTOOPT, sock, - skb, secid); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.socket_getpeersec_dgram, + list) { + rc = hp->hook.socket_getpeersec_dgram(sock, skb, secid); + if (rc != LSM_RET_DEFAULT(socket_getpeersec_dgram)) + return rc; + } + return LSM_RET_DEFAULT(socket_getpeersec_dgram); } EXPORT_SYMBOL(security_socket_getpeersec_dgram); diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh index 2a268b17b61f..dbdd736a41d3 100644 --- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh +++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh @@ -48,6 +48,17 @@ test_LAG_cleanup() ip link add mv0 link "$name" up address "$ucaddr" type macvlan # Used to test dev->mc handling ip address add "$addr6" dev "$name" + + # Check that addresses were added as expected + (grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "macvlan unicast address not found on a slave" + + # mcaddr is added asynchronously by addrconf_dad_work(), use busywait + (busywait 10000 grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy1 || + grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy2) >/dev/null + check_err $? "IPv6 solicited-node multicast mac address not found on a slave" + ip link set dev "$name" down ip link del "$name" diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config index 265b6882cc21..b5e3a3aad4bf 100644 --- a/tools/testing/selftests/drivers/net/team/config +++ b/tools/testing/selftests/drivers/net/team/config @@ -1,3 +1,5 @@ +CONFIG_DUMMY=y +CONFIG_IPV6=y +CONFIG_MACVLAN=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y -CONFIG_MACVLAN=y diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py index 352fc39f3c6c..b62c7dba6777 100644 --- a/tools/testing/selftests/hid/tests/test_wacom_generic.py +++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py @@ -880,8 +880,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest does not overlap with other contacts. The value of `t` may be incremented over time to move the point along a linear path. """ - x = 50 + 10 * contact_id + t - y = 100 + 100 * contact_id + t + x = 50 + 10 * contact_id + t * 11 + y = 100 + 100 * contact_id + t * 11 return test_multitouch.Touch(contact_id, x, y) def make_contacts(self, n, t=0): @@ -902,8 +902,8 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest tracking_id = contact_ids.tracking_id slot_num = contact_ids.slot_num - x = 50 + 10 * contact_id + t - y = 100 + 100 * contact_id + t + x = 50 + 10 * contact_id + t * 11 + y = 100 + 100 * contact_id + t * 11 # If the data isn't supposed to be stored in any slots, there is # nothing we can check for in the evdev stream. diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index c8416c54b463..b1fd7362c2fe 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -42,17 +42,6 @@ function die() { exit 1 } -# save existing dmesg so we can detect new content -function save_dmesg() { - SAVED_DMESG=$(mktemp --tmpdir -t klp-dmesg-XXXXXX) - dmesg > "$SAVED_DMESG" -} - -# cleanup temporary dmesg file from save_dmesg() -function cleanup_dmesg_file() { - rm -f "$SAVED_DMESG" -} - function push_config() { DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \ awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}') @@ -99,7 +88,6 @@ function set_ftrace_enabled() { function cleanup() { pop_config - cleanup_dmesg_file } # setup_config - save the current config and set a script exit trap that @@ -280,7 +268,15 @@ function set_pre_patch_ret { function start_test { local test="$1" - save_dmesg + # Dump something unique into the dmesg log, then stash the entry + # in LAST_DMESG. The check_result() function will use it to + # find new kernel messages since the test started. + local last_dmesg_msg="livepatch kselftest timestamp: $(date --rfc-3339=ns)" + log "$last_dmesg_msg" + loop_until 'dmesg | grep -q "$last_dmesg_msg"' || + die "buffer busy? can't find canary dmesg message: $last_dmesg_msg" + LAST_DMESG=$(dmesg | grep "$last_dmesg_msg") + echo -n "TEST: $test ... " log "===== TEST: $test =====" } @@ -291,23 +287,24 @@ function check_result { local expect="$*" local result - # Note: when comparing dmesg output, the kernel log timestamps - # help differentiate repeated testing runs. Remove them with a - # post-comparison sed filter. - - result=$(dmesg | comm --nocheck-order -13 "$SAVED_DMESG" - | \ + # Test results include any new dmesg entry since LAST_DMESG, then: + # - include lines matching keywords + # - exclude lines matching keywords + # - filter out dmesg timestamp prefixes + result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \ grep -e 'livepatch:' -e 'test_klp' | \ grep -v '\(tainting\|taints\) kernel' | \ sed 's/^\[[ 0-9.]*\] //') if [[ "$expect" == "$result" ]] ; then echo "ok" + elif [[ "$result" == "" ]] ; then + echo -e "not ok\n\nbuffer overrun? can't find canary dmesg entry: $LAST_DMESG\n" + die "livepatch kselftest(s) failed" else echo -e "not ok\n\n$(diff -upr --label expected --label result <(echo "$expect") <(echo "$result"))\n" die "livepatch kselftest(s) failed" fi - - cleanup_dmesg_file } # check_sysfs_rights(modname, rel_path, expected_rights) - check sysfs diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index 0899019a7fcb..e14bdd4455f2 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Kselftest framework requirement - SKIP code is 4. diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index 380b691d3eb9..b748c48908d9 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -566,7 +566,7 @@ static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot, if (map_ptr_orig == MAP_FAILED) err(2, "initial mmap"); - if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE)) + if (madvise(map_ptr, len, MADV_HUGEPAGE)) err(2, "MADV_HUGEPAGE"); pagemap_fd = open("/proc/self/pagemap", O_RDONLY); diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c index 193281560b61..86e8f2048a40 100644 --- a/tools/testing/selftests/mm/map_hugetlb.c +++ b/tools/testing/selftests/mm/map_hugetlb.c @@ -15,6 +15,7 @@ #include <unistd.h> #include <sys/mman.h> #include <fcntl.h> +#include "vm_util.h" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -58,10 +59,16 @@ int main(int argc, char **argv) { void *addr; int ret; + size_t hugepage_size; size_t length = LENGTH; int flags = FLAGS; int shift = 0; + hugepage_size = default_huge_page_size(); + /* munmap with fail if the length is not page aligned */ + if (hugepage_size > length) + length = hugepage_size; + if (argc > 1) length = atol(argv[1]) << 20; if (argc > 2) { diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 1d4c1589c305..2f8b991f78cb 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -360,7 +360,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, char pattern_seed) { void *addr, *src_addr, *dest_addr, *dest_preamble_addr; - unsigned long long i; + int d; + unsigned long long t; struct timespec t_start = {0, 0}, t_end = {0, 0}; long long start_ns, end_ns, align_mask, ret, offset; unsigned long long threshold; @@ -378,8 +379,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Set byte pattern for source block. */ srand(pattern_seed); - for (i = 0; i < threshold; i++) - memset((char *) src_addr + i, (char) rand(), 1); + for (t = 0; t < threshold; t++) + memset((char *) src_addr + t, (char) rand(), 1); /* Mask to zero out lower bits of address for alignment */ align_mask = ~(c.dest_alignment - 1); @@ -420,8 +421,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Set byte pattern for the dest preamble block. */ srand(pattern_seed); - for (i = 0; i < c.dest_preamble_size; i++) - memset((char *) dest_preamble_addr + i, (char) rand(), 1); + for (d = 0; d < c.dest_preamble_size; d++) + memset((char *) dest_preamble_addr + d, (char) rand(), 1); } clock_gettime(CLOCK_MONOTONIC, &t_start); @@ -437,14 +438,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Verify byte pattern after remapping */ srand(pattern_seed); - for (i = 0; i < threshold; i++) { + for (t = 0; t < threshold; t++) { char c = (char) rand(); - if (((char *) dest_addr)[i] != c) { + if (((char *) dest_addr)[t] != c) { ksft_print_msg("Data after remap doesn't match at offset %llu\n", - i); + t); ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, - ((char *) dest_addr)[i] & 0xff); + ((char *) dest_addr)[t] & 0xff); ret = -1; goto clean_up_dest; } @@ -453,14 +454,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Verify the dest preamble byte pattern after remapping */ if (c.dest_preamble_size) { srand(pattern_seed); - for (i = 0; i < c.dest_preamble_size; i++) { + for (d = 0; d < c.dest_preamble_size; d++) { char c = (char) rand(); - if (((char *) dest_preamble_addr)[i] != c) { + if (((char *) dest_preamble_addr)[d] != c) { ksft_print_msg("Preamble data after remap doesn't match at offset %d\n", - i); + d); ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, - ((char *) dest_preamble_addr)[i] & 0xff); + ((char *) dest_preamble_addr)[d] & 0xff); ret = -1; goto clean_up_dest; } diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 45cae7cab27e..a0a75f302904 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -29,9 +29,15 @@ check_supported_x86_64() # See man 1 gzip under '-f'. local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;} + else {print 1}; exit}' /proc/cpuinfo 2>/dev/null) + if [[ "${pg_table_levels}" -lt 5 ]]; then echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" exit $ksft_skip + elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then + echo "$0: CPU does not have the necessary la57 flag to support page table level 5" + exit $ksft_skip fi } diff --git a/tools/testing/selftests/mm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh index 70a02301f4c2..3d2d2eb9d6ff 100755 --- a/tools/testing/selftests/mm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 set -e diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 50818075e566..211753756bde 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -53,8 +53,7 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh -TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh -TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh lib.sh +TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite @@ -84,6 +83,7 @@ TEST_PROGS += sctp_vrf.sh TEST_GEN_FILES += sctp_hello TEST_GEN_FILES += csum TEST_GEN_FILES += nat6to4.o +TEST_GEN_FILES += xdp_dummy.o TEST_GEN_FILES += ip_local_port_range TEST_GEN_FILES += bind_wildcard TEST_PROGS += test_vxlan_mdb.sh @@ -95,6 +95,7 @@ TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh TEST_FILES := settings +TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh include ../lib.mk @@ -104,7 +105,7 @@ $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread $(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/ -# Rules to generate bpf obj nat6to4.o +# Rules to generate bpf objs CLANG ?= clang SCRATCH_DIR := $(OUTPUT)/tools BUILD_DIR := $(SCRATCH_DIR)/build @@ -139,7 +140,7 @@ endif CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) -$(OUTPUT)/nat6to4.o: nat6to4.c $(BPFOBJ) | $(MAKE_DIRS) +$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS) $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 19ff75051660..3b749addd364 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -19,8 +19,11 @@ CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_VLAN_8021Q=y +CONFIG_GENEVE=m CONFIG_IFB=y CONFIG_INET_DIAG=y +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y @@ -29,7 +32,10 @@ CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_RAW=m CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_TARGET_TTL=m CONFIG_IPV6_GRE=m CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_L2TP_ETH=m @@ -45,8 +51,14 @@ CONFIG_NF_TABLES=m CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_U32=m CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m @@ -55,6 +67,9 @@ CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_NET_SCH_PRIO=m +CONFIG_NFT_COMPAT=m +CONFIG_NF_FLOW_TABLE=m CONFIG_PSAMPLE=m CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m @@ -80,3 +95,4 @@ CONFIG_IP_SCTP=m CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m +CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 452693514be4..4de92632f483 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -112,7 +112,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh -TEST_PROGS_EXTENDED := devlink_lib.sh \ +TEST_FILES := devlink_lib.sh \ ethtool_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index dca549443801..f9fe182dfbd4 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -4,6 +4,9 @@ ############################################################################## # Defines +WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} +BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 # namespace list created by setup_ns @@ -48,7 +51,7 @@ cleanup_ns() for ns in "$@"; do ip netns delete "${ns}" &> /dev/null - if ! busywait 2 ip netns list \| grep -vq "^$ns$" &> /dev/null; then + if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then echo "Warn: Failed to remove namespace $ns" ret=1 fi diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index e317c2e44dae..4f80014cae49 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -22,8 +22,11 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IP6_NF_FILTER=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_ACT=y diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 3a5b63026191..c07386e21e0a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -643,13 +643,6 @@ kill_events_pids() mptcp_lib_kill_wait $evts_ns2_pid } -kill_tests_wait() -{ - #shellcheck disable=SC2046 - kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1) - wait -} - pm_nl_set_limits() { local ns=$1 @@ -3453,7 +3446,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm create destroy subflow @@ -3475,7 +3468,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm create id 0 subflow @@ -3494,7 +3487,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm remove initial subflow @@ -3518,7 +3511,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm send RM_ADDR for ID 0 @@ -3544,7 +3537,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi } @@ -3558,7 +3551,8 @@ endpoint_tests() pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal speed=slow \ - run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! wait_mpj $ns1 pm_nl_check_endpoint "creation" \ @@ -3573,7 +3567,7 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - kill_tests_wait + mptcp_lib_kill_wait $tests_pid fi if reset "delete and re-add" && @@ -3582,7 +3576,8 @@ endpoint_tests() pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow test_linkfail=4 speed=20 \ - run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! wait_mpj $ns2 chk_subflow_nr "before delete" 2 @@ -3597,7 +3592,7 @@ endpoint_tests() wait_mpj $ns2 chk_subflow_nr "after re-add" 2 chk_mptcp_info subflows 1 subflows 1 - kill_tests_wait + mptcp_lib_kill_wait $tests_pid fi } diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 022262a2cfe0..3a2abae5993e 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -6,7 +6,7 @@ readonly KSFT_FAIL=1 readonly KSFT_SKIP=4 # shellcheck disable=SC2155 # declare and assign separately -readonly KSFT_TEST=$(basename "${0}" | sed 's/\.sh$//g') +readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" MPTCP_LIB_SUBTESTS=() diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 79b65bdf05db..abc5648b59ab 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=1200 +timeout=1800 diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index ae8ad5d6fb9d..0cc964e6f2c1 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -284,12 +284,12 @@ done setup run_test 10 10 0 0 "balanced bwidth" -run_test 10 10 1 50 "balanced bwidth with unbalanced delay" +run_test 10 10 1 25 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -run_test 30 10 0 0 "unbalanced bwidth" -run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" -run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +run_test 10 3 0 0 "unbalanced bwidth" +run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" +run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh index 4fe0befa13fb..4fe0befa13fb 100755..100644 --- a/tools/testing/selftests/net/net_helper.sh +++ b/tools/testing/selftests/net/net_helper.sh diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index f10879788f61..3f118e3f1c66 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -707,23 +707,23 @@ setup_xfrm6() { } setup_xfrm4udp() { - setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udp() { - setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_xfrm4udprouted() { - setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udprouted() { - setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_routing_old() { @@ -1339,7 +1339,7 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { sleep 1 - dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 + dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 size=$(du -sb $tmpoutfile) size=${size%%/tmp/*} diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh index 2070b57849de..2070b57849de 100755..100644 --- a/tools/testing/selftests/net/setup_loopback.sh +++ b/tools/testing/selftests/net/setup_loopback.sh diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh index a9a1759e035c..1f78a87f6f37 100644 --- a/tools/testing/selftests/net/setup_veth.sh +++ b/tools/testing/selftests/net/setup_veth.sh @@ -11,7 +11,7 @@ setup_veth_ns() { local -r ns_mac="$4" [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 ip -netns "${ns_name}" link set dev "${ns_dev}" up diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config new file mode 100644 index 000000000000..d3277a9de987 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/config @@ -0,0 +1,10 @@ +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_RMD160=y +CONFIG_CRYPTO_SHA1=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=y +CONFIG_TCP_AO=y +CONFIG_TCP_MD5SIG=y +CONFIG_VETH=m diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index c48b4970ca17..24e62120b792 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -417,9 +417,9 @@ struct test_key { matches_vrf : 1, is_current : 1, is_rnext : 1, - used_on_handshake : 1, - used_after_accept : 1, - used_on_client : 1; + used_on_server_tx : 1, + used_on_client_tx : 1, + skip_counters_checks : 1; }; struct key_collection { @@ -609,16 +609,14 @@ static int key_collection_socket(bool server, unsigned int port) addr = &this_ip_dest; sndid = key->client_keyid; rcvid = key->server_keyid; - set_current = key->is_current; - set_rnext = key->is_rnext; + key->used_on_client_tx = set_current = key->is_current; + key->used_on_server_tx = set_rnext = key->is_rnext; } if (test_add_key_cr(sk, key->password, key->len, *addr, vrf, sndid, rcvid, key->maclen, key->alg, set_current, set_rnext)) test_key_error("setsockopt(TCP_AO_ADD_KEY)", key); - if (set_current || set_rnext) - key->used_on_handshake = 1; #ifdef DEBUG test_print("%s [%u/%u] key: { %s, %u:%u, %u, %u:%u:%u:%u (%u)}", server ? "server" : "client", i, collection.nr_keys, @@ -640,22 +638,22 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server for (i = 0; i < collection.nr_keys; i++) { struct test_key *key = &collection.keys[i]; uint8_t sndid, rcvid; - bool was_used; + bool rx_cnt_expected; + if (key->skip_counters_checks) + continue; if (server) { sndid = key->server_keyid; rcvid = key->client_keyid; - if (is_listen_sk) - was_used = key->used_on_handshake; - else - was_used = key->used_after_accept; + rx_cnt_expected = key->used_on_client_tx; } else { sndid = key->client_keyid; rcvid = key->server_keyid; - was_used = key->used_on_client; + rx_cnt_expected = key->used_on_server_tx; } - test_tcp_ao_key_counters_cmp(tst_name, a, b, was_used, + test_tcp_ao_key_counters_cmp(tst_name, a, b, + rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, sndid, rcvid); } test_tcp_ao_counters_free(a); @@ -843,7 +841,7 @@ static void end_server(const char *tst_name, int sk, synchronize_threads(); /* 4: verified => closed */ close(sk); - verify_counters(tst_name, true, false, begin, &end); + verify_counters(tst_name, false, true, begin, &end); synchronize_threads(); /* 5: counters */ } @@ -916,9 +914,8 @@ static int run_client(const char *tst_name, unsigned int port, current_index = nr_keys - 1; if (rnext_index < 0) rnext_index = nr_keys - 1; - collection.keys[current_index].used_on_handshake = 1; - collection.keys[rnext_index].used_after_accept = 1; - collection.keys[rnext_index].used_on_client = 1; + collection.keys[current_index].used_on_client_tx = 1; + collection.keys[rnext_index].used_on_server_tx = 1; synchronize_threads(); /* 3: accepted => send data */ if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) { @@ -1059,7 +1056,16 @@ static void check_current_back(const char *tst_name, unsigned int port, test_error("Can't change the current key"); if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) test_fail("verify failed"); - collection.keys[rotate_to_index].used_after_accept = 1; + /* There is a race here: between setting the current_key with + * setsockopt(TCP_AO_INFO) and starting to send some data - there + * might have been a segment received with the desired + * RNext_key set. In turn that would mean that the first outgoing + * segment will have the desired current_key (flipped back). + * Which is what the user/test wants. As it's racy, skip checking + * the counters, yet check what are the resulting current/rnext + * keys on both sides. + */ + collection.keys[rotate_to_index].skip_counters_checks = 1; end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); } @@ -1089,7 +1095,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port, } verify_current_rnext(tst_name, sk, -1, collection.keys[i].server_keyid); - collection.keys[i].used_on_client = 1; + collection.keys[i].used_on_server_tx = 1; synchronize_threads(); /* verify current/rnext */ } end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c index c75d82885a2e..15aeb0963058 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/sock.c +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -62,7 +62,9 @@ int test_wait_fd(int sk, time_t sec, bool write) return -ETIMEDOUT; } - if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen) || ret) + if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen)) + return -errno; + if (ret) return -ret; return 0; } @@ -584,9 +586,11 @@ int test_client_verify(int sk, const size_t msg_len, const size_t nr, { size_t buf_sz = msg_len * nr; char *buf = alloca(buf_sz); + ssize_t ret; randomize_buffer(buf, buf_sz); - if (test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec) != buf_sz) - return -1; - return 0; + ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec); + if (ret < 0) + return (int)ret; + return ret != buf_sz ? -1 : 0; } diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c index ac06009a7f5f..7df8b8700e39 100644 --- a/tools/testing/selftests/net/tcp_ao/rst.c +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -1,10 +1,33 @@ // SPDX-License-Identifier: GPL-2.0 -/* Author: Dmitry Safonov <[email protected]> */ +/* + * The test checks that both active and passive reset have correct TCP-AO + * signature. An "active" reset (abort) here is procured from closing + * listen() socket with non-accepted connections in the queue: + * inet_csk_listen_stop() => inet_child_forget() => + * => tcp_disconnect() => tcp_send_active_reset() + * + * The passive reset is quite hard to get on established TCP connections. + * It could be procured from non-established states, but the synchronization + * part from userspace in order to reliably get RST seems uneasy. + * So, instead it's procured by corrupting SEQ number on TIMED-WAIT state. + * + * It's important to test both passive and active RST as they go through + * different code-paths: + * - tcp_send_active_reset() makes no-data skb, sends it with tcp_transmit_skb() + * - tcp_v*_send_reset() create their reply skbs and send them with + * ip_send_unicast_reply() + * + * In both cases TCP-AO signatures have to be correct, which is verified by + * (1) checking that the TCP-AO connection was reset and (2) TCP-AO counters. + * + * Author: Dmitry Safonov <[email protected]> + */ #include <inttypes.h> #include "../../../../include/linux/kernel.h" #include "aolib.h" const size_t quota = 1000; +const size_t packet_sz = 100; /* * Backlog == 0 means 1 connection in queue, see: * commit 64a146513f8f ("[NET]: Revert incorrect accept queue...") @@ -59,26 +82,6 @@ static void close_forced(int sk) close(sk); } -static int test_wait_for_exception(int sk, time_t sec) -{ - struct timeval tv = { .tv_sec = sec }; - struct timeval *ptv = NULL; - fd_set efds; - int ret; - - FD_ZERO(&efds); - FD_SET(sk, &efds); - - if (sec) - ptv = &tv; - - errno = 0; - ret = select(sk + 1, NULL, NULL, &efds, ptv); - if (ret < 0) - return -errno; - return ret ? sk : 0; -} - static void test_server_active_rst(unsigned int port) { struct tcp_ao_counters cnt1, cnt2; @@ -155,17 +158,16 @@ static void test_server_passive_rst(unsigned int port) test_fail("server returned %zd", bytes); } - synchronize_threads(); /* 3: chekpoint/restore the connection */ + synchronize_threads(); /* 3: checkpoint the client */ + synchronize_threads(); /* 4: close the server, creating twsk */ if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); - - synchronize_threads(); /* 4: terminate server + send more on client */ - bytes = test_server_run(sk, quota, TEST_RETRANSMIT_SEC); close(sk); + + synchronize_threads(); /* 5: restore the socket, send more data */ test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD); - synchronize_threads(); /* 5: verified => closed */ - close(sk); + synchronize_threads(); /* 6: server exits */ } static void *server_fn(void *arg) @@ -284,7 +286,7 @@ static void test_client_active_rst(unsigned int port) test_error("test_wait_fds(): %d", err); synchronize_threads(); /* 3: close listen socket */ - if (test_client_verify(sk[0], 100, quota / 100, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); @@ -323,7 +325,6 @@ static void test_client_passive_rst(unsigned int port) struct tcp_sock_state img; sockaddr_af saddr; int sk, err; - socklen_t slen = sizeof(err); sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); if (sk < 0) @@ -337,18 +338,51 @@ static void test_client_passive_rst(unsigned int port) test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, 100, quota / 100, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); - synchronize_threads(); /* 3: chekpoint/restore the connection */ + synchronize_threads(); /* 3: checkpoint the client */ test_enable_repair(sk); test_sock_checkpoint(sk, &img, &saddr); test_ao_checkpoint(sk, &ao_img); - test_kill_sk(sk); + test_disable_repair(sk); - img.out.seq += quota; + synchronize_threads(); /* 4: close the server, creating twsk */ + + /* + * The "corruption" in SEQ has to be small enough to fit into TCP + * window, see tcp_timewait_state_process() for out-of-window + * segments. + */ + img.out.seq += 5; /* 5 is more noticeable in tcpdump than 1 */ + + /* + * FIXME: This is kind-of ugly and dirty, but it works. + * + * At this moment, the server has close'ed(sk). + * The passive RST that is being targeted here is new data after + * half-duplex close, see tcp_timewait_state_process() => TCP_TW_RST + * + * What is needed here is: + * (1) wait for FIN from the server + * (2) make sure that the ACK from the client went out + * (3) make sure that the ACK was received and processed by the server + * + * Otherwise, the data that will be sent from "repaired" socket + * post SEQ corruption may get to the server before it's in + * TCP_FIN_WAIT2. + * + * (1) is easy with select()/poll() + * (2) is possible by polling tcpi_state from TCP_INFO + * (3) is quite complex: as server's socket was already closed, + * probably the way to do it would be tcp-diag. + */ + sleep(TEST_RETRANSMIT_SEC); + + synchronize_threads(); /* 5: restore the socket, send more data */ + test_kill_sk(sk); sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); if (sk < 0) @@ -366,25 +400,33 @@ static void test_client_passive_rst(unsigned int port) test_disable_repair(sk); test_sock_state_free(&img); - synchronize_threads(); /* 4: terminate server + send more on client */ - if (test_client_verify(sk, 100, quota / 100, 2 * TEST_TIMEOUT_SEC)) - test_ok("client connection broken post-seq-adjust"); - else - test_fail("client connection still works post-seq-adjust"); - - test_wait_for_exception(sk, TEST_TIMEOUT_SEC); - - if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &err, &slen)) - test_error("getsockopt()"); - if (err != ECONNRESET && err != EPIPE) - test_fail("client connection was not reset: %d", err); + /* + * This is how "passive reset" is acquired in this test from TCP_TW_RST: + * + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [P.], seq 901:1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x10217d6c36a22379086ef3b1], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [F.], seq 1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x104ffc99b98c10a5298cc268], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [.], ack 1002, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0xe496dd4f7f5a8a66873c6f93,nop,nop,sack 1 {1001:1002}], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [P.], seq 1006:1106, ack 1001, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x1b5f3330fb23fbcd0c77d0ca], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0 + */ + err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC); + /* Make sure that the connection was reset, not timeouted */ + if (err && err == -ECONNRESET) + test_ok("client sock was passively reset post-seq-adjust"); + else if (err) + test_fail("client sock was not reset post-seq-adjust: %d", err); else - test_ok("client connection was reset"); + test_fail("client sock is yet connected post-seq-adjust"); if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); - synchronize_threads(); /* 5: verified => closed */ + synchronize_threads(); /* 6: server exits */ close(sk); test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD); } @@ -410,6 +452,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(15, server_fn, client_fn); + test_init(14, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/settings b/tools/testing/selftests/net/tcp_ao/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/settings @@ -0,0 +1 @@ +timeout=120 diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index af5dc57c8ce9..8802604148dd 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" # set global exit status, but never reset nonzero one. check_err() @@ -197,7 +197,7 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index cb664679b434..7080eae5312b 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" cleanup() { local -r jobs="$(jobs -p)" @@ -84,7 +84,7 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index dd47fa96f6b3..e1ff645bd3d1 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" cleanup() { local -r jobs="$(jobs -p)" @@ -85,12 +85,12 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi if [ ! -f nat6to4.o ]; then - echo "Missing nat6to4 helper. Build bpf nat6to4.o selftest first" + echo "Missing nat6to4 helper. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index c079565add39..d6b9c759043c 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,7 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="../bpf/xdp_dummy.bpf.o" +source net_helper.sh + +BPF_FILE="xdp_dummy.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 @@ -119,7 +121,7 @@ run_test() { ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000 ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args & local spid=$! - sleep 0.1 + wait_local_port_listen "$NS_DST" 8000 udp ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst local retc=$? wait $spid @@ -168,7 +170,7 @@ run_bench() { ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus" ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 & local spid=$! - sleep 0.1 + wait_local_port_listen "$NS_DST" 8000 udp ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst local retc=$? wait $spid diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 2d073595c620..27574bbf2d63 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 @@ -218,7 +218,7 @@ while getopts "hs:" option; do done if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit 1 fi diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/xdp_dummy.c new file mode 100644 index 000000000000..d988b2e0cee8 --- /dev/null +++ b/tools/testing/selftests/net/xdp_dummy.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("xdp") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c index 887542961968..2348d2c20d0a 100644 --- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c @@ -24,6 +24,11 @@ bool rseq_validate_cpu_id(void) { return rseq_mm_cid_available(); } +static +bool rseq_use_cpu_index(void) +{ + return false; /* Use mm_cid */ +} #else # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID static @@ -36,6 +41,11 @@ bool rseq_validate_cpu_id(void) { return rseq_current_cpu_raw() >= 0; } +static +bool rseq_use_cpu_index(void) +{ + return true; /* Use cpu_id as index. */ +} #endif struct percpu_lock_entry { @@ -274,7 +284,7 @@ void test_percpu_list(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; for (j = 1; j <= 100; j++) { struct percpu_list_node *node; @@ -299,7 +309,7 @@ void test_percpu_list(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_list_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_list_pop(&list, i))) { diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 20403d58345c..2f37961240ca 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -288,6 +288,11 @@ bool rseq_validate_cpu_id(void) { return rseq_mm_cid_available(); } +static +bool rseq_use_cpu_index(void) +{ + return false; /* Use mm_cid */ +} # ifdef TEST_MEMBARRIER /* * Membarrier does not currently support targeting a mm_cid, so @@ -312,6 +317,11 @@ bool rseq_validate_cpu_id(void) { return rseq_current_cpu_raw() >= 0; } +static +bool rseq_use_cpu_index(void) +{ + return true; /* Use cpu_id as index. */ +} # ifdef TEST_MEMBARRIER static int rseq_membarrier_expedited(int cpu) @@ -715,7 +725,7 @@ void test_percpu_list(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; for (j = 1; j <= 100; j++) { struct percpu_list_node *node; @@ -752,7 +762,7 @@ void test_percpu_list(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_list_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_list_pop(&list, i))) { @@ -902,7 +912,7 @@ void test_percpu_buffer(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; /* Worse-case is every item in same CPU. */ buffer.c[i].array = @@ -952,7 +962,7 @@ void test_percpu_buffer(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_buffer_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_buffer_pop(&buffer, i))) { @@ -1113,7 +1123,7 @@ void test_percpu_memcpy_buffer(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; /* Worse-case is every item in same CPU. */ buffer.c[i].array = @@ -1160,7 +1170,7 @@ void test_percpu_memcpy_buffer(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_memcpy_buffer_node item; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 5b5c9d558dee..97b86980b768 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -38,10 +38,10 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples) i *= 1000000000ULL; i += finish.tv_nsec - start.tv_nsec; - printf("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n", - finish.tv_sec, finish.tv_nsec, - start.tv_sec, start.tv_nsec, - i, (double)i / 1000000000.0); + ksft_print_msg("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n", + finish.tv_sec, finish.tv_nsec, + start.tv_sec, start.tv_nsec, + i, (double)i / 1000000000.0); return i; } @@ -53,7 +53,7 @@ unsigned long long calibrate(void) pid_t pid, ret; int seconds = 15; - printf("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds); + ksft_print_msg("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds); samples = 0; pid = getpid(); @@ -98,24 +98,36 @@ bool le(int i_one, int i_two) } long compare(const char *name_one, const char *name_eval, const char *name_two, - unsigned long long one, bool (*eval)(int, int), unsigned long long two) + unsigned long long one, bool (*eval)(int, int), unsigned long long two, + bool skip) { bool good; - printf("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, - (long long)one, name_eval, (long long)two); + if (skip) { + ksft_test_result_skip("%s %s %s\n", name_one, name_eval, + name_two); + return 0; + } + + ksft_print_msg("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, + (long long)one, name_eval, (long long)two); if (one > INT_MAX) { - printf("Miscalculation! Measurement went negative: %lld\n", (long long)one); - return 1; + ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)one); + good = false; + goto out; } if (two > INT_MAX) { - printf("Miscalculation! Measurement went negative: %lld\n", (long long)two); - return 1; + ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)two); + good = false; + goto out; } good = eval(one, two); printf("%s\n", good ? "✔️" : "❌"); +out: + ksft_test_result(good, "%s %s %s\n", name_one, name_eval, name_two); + return good ? 0 : 1; } @@ -142,15 +154,22 @@ int main(int argc, char *argv[]) unsigned long long samples, calc; unsigned long long native, filter1, filter2, bitmap1, bitmap2; unsigned long long entry, per_filter1, per_filter2; + bool skip = false; setbuf(stdout, NULL); - printf("Running on:\n"); + ksft_print_header(); + ksft_set_plan(7); + + ksft_print_msg("Running on:\n"); + ksft_print_msg(""); system("uname -a"); - printf("Current BPF sysctl settings:\n"); + ksft_print_msg("Current BPF sysctl settings:\n"); /* Avoid using "sysctl" which may not be installed. */ + ksft_print_msg(""); system("grep -H . /proc/sys/net/core/bpf_jit_enable"); + ksft_print_msg(""); system("grep -H . /proc/sys/net/core/bpf_jit_harden"); if (argc > 1) @@ -158,11 +177,11 @@ int main(int argc, char *argv[]) else samples = calibrate(); - printf("Benchmarking %llu syscalls...\n", samples); + ksft_print_msg("Benchmarking %llu syscalls...\n", samples); /* Native call */ native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid native: %llu ns\n", native); + ksft_print_msg("getpid native: %llu ns\n", native); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); assert(ret == 0); @@ -172,35 +191,37 @@ int main(int argc, char *argv[]) assert(ret == 0); bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1); + ksft_print_msg("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1); /* Second filter resulting in a bitmap */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); assert(ret == 0); bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2); + ksft_print_msg("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2); /* Third filter, can no longer be converted to bitmap */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); assert(ret == 0); filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1); + ksft_print_msg("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1); /* Fourth filter, can not be converted to bitmap because of filter 3 */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); assert(ret == 0); filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2); + ksft_print_msg("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2); /* Estimations */ #define ESTIMATE(fmt, var, what) do { \ var = (what); \ - printf("Estimated " fmt ": %llu ns\n", var); \ - if (var > INT_MAX) \ - goto more_samples; \ + ksft_print_msg("Estimated " fmt ": %llu ns\n", var); \ + if (var > INT_MAX) { \ + skip = true; \ + ret |= 1; \ + } \ } while (0) ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc, @@ -218,31 +239,34 @@ int main(int argc, char *argv[]) ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2, (filter2 - native - entry) / 4); - printf("Expectations:\n"); - ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1); - bits = compare("native", "≤", "1 filter", native, le, filter1); + ksft_print_msg("Expectations:\n"); + ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1, + skip); + bits = compare("native", "≤", "1 filter", native, le, filter1, + skip); if (bits) - goto more_samples; + skip = true; ret |= compare("per-filter (last 2 diff)", "≈", "per-filter (filters / 4)", - per_filter1, approx, per_filter2); + per_filter1, approx, per_filter2, skip); bits = compare("1 bitmapped", "≈", "2 bitmapped", - bitmap1 - native, approx, bitmap2 - native); + bitmap1 - native, approx, bitmap2 - native, skip); if (bits) { - printf("Skipping constant action bitmap expectations: they appear unsupported.\n"); - goto out; + ksft_print_msg("Skipping constant action bitmap expectations: they appear unsupported.\n"); + skip = true; } - ret |= compare("entry", "≈", "1 bitmapped", entry, approx, bitmap1 - native); - ret |= compare("entry", "≈", "2 bitmapped", entry, approx, bitmap2 - native); + ret |= compare("entry", "≈", "1 bitmapped", entry, approx, + bitmap1 - native, skip); + ret |= compare("entry", "≈", "2 bitmapped", entry, approx, + bitmap2 - native, skip); ret |= compare("native + entry + (per filter * 4)", "≈", "4 filters total", - entry + (per_filter1 * 4) + native, approx, filter2); - if (ret == 0) - goto out; + entry + (per_filter1 * 4) + native, approx, filter2, + skip); -more_samples: - printf("Saw unexpected benchmark result. Try running again with more samples?\n"); -out: - return 0; + if (ret) + ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n"); + + ksft_finished(); } |