From eec4954b81c3d9a38b99e78afb553c359db40093 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 28 Nov 2023 10:28:15 +0000 Subject: driver core: make device_is_dependent() static The function device_is_dependent() is only called by the driver core internally and should not, at this time, be called by anyone else outside of it, so mark it as static so as not to give driver authors the wrong idea. Cc: Saravana Kannan Acked-by: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/2023112815-faculty-thud-add8@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 2 +- include/linux/device.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 67ba592afc77..e57467194104 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -298,7 +298,7 @@ static inline bool device_link_flag_is_sync_state_only(u32 flags) * Check if @target depends on @dev or any device dependent on it (its child or * its consumer etc). Return 1 if that is the case or 0 otherwise. */ -int device_is_dependent(struct device *dev, void *target) +static int device_is_dependent(struct device *dev, void *target) { struct device_link *link; int ret; diff --git a/include/linux/device.h b/include/linux/device.h index d7a72a8749ea..4aa34c8d1361 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1071,7 +1071,6 @@ int device_rename(struct device *dev, const char *new_name); int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order); int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid); -int device_is_dependent(struct device *dev, void *target); static inline bool device_supports_offline(struct device *dev) { -- cgit From c72bbf200162a3b4b9e1baedec9008d8d710b427 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Nov 2023 13:43:54 +0000 Subject: arch_topology: Make register_cpu_capacity_sysctl() tolerant to late CPUs register_cpu_capacity_sysctl() adds a property to sysfs that describes the CPUs capacity. This is done from a subsys_initcall() that assumes all possible CPUs are registered. With CPU hotplug, possible CPUs aren't registered until they become present, (or for arm64 enabled). This leads to messages during boot: | register_cpu_capacity_sysctl: too early to get CPU1 device! and once these CPUs are added to the system, the file is missing. Move this to a cpuhp callback, so that the file is created once CPUs are brought online. This covers CPUs that are added late by mechanisms like hotplug. One observable difference is the file is now missing for offline CPUs. Signed-off-by: James Morse Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R2g-00CsyV-Ss@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- drivers/base/arch_topology.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index b741b5ba82bd..9ccb7daee78e 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -220,20 +220,34 @@ static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn); static DEVICE_ATTR_RO(cpu_capacity); -static int register_cpu_capacity_sysctl(void) +static int cpu_capacity_sysctl_add(unsigned int cpu) { - int i; - struct device *cpu; + struct device *cpu_dev = get_cpu_device(cpu); - for_each_possible_cpu(i) { - cpu = get_cpu_device(i); - if (!cpu) { - pr_err("%s: too early to get CPU%d device!\n", - __func__, i); - continue; - } - device_create_file(cpu, &dev_attr_cpu_capacity); - } + if (!cpu_dev) + return -ENOENT; + + device_create_file(cpu_dev, &dev_attr_cpu_capacity); + + return 0; +} + +static int cpu_capacity_sysctl_remove(unsigned int cpu) +{ + struct device *cpu_dev = get_cpu_device(cpu); + + if (!cpu_dev) + return -ENOENT; + + device_remove_file(cpu_dev, &dev_attr_cpu_capacity); + + return 0; +} + +static int register_cpu_capacity_sysctl(void) +{ + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "topology/cpu-capacity", + cpu_capacity_sysctl_add, cpu_capacity_sysctl_remove); return 0; } -- cgit From d87c49377d5bbf3f5e5729c67f3892e1d2e6f661 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Nov 2023 13:44:00 +0000 Subject: x86: intel_epb: Don't rely on link order intel_epb_init() is called as a subsys_initcall() to register cpuhp callbacks. The callbacks make use of get_cpu_device() which will return NULL unless register_cpu() has been called. register_cpu() is called from topology_init(), which is also a subsys_initcall(). This is fragile. Moving the register_cpu() to a different subsys_initcall() leads to a NULL dereference during boot. Make intel_epb_init() a late_initcall(), user-space can't provide a policy before this point anyway. Signed-off-by: James Morse Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Acked-by: "Rafael J. Wysocki" Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R2m-00Csyb-2S@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/intel_epb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c index e4c3ba91321c..f18d35fe27a9 100644 --- a/arch/x86/kernel/cpu/intel_epb.c +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -237,4 +237,4 @@ err_out_online: cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE); return ret; } -subsys_initcall(intel_epb_init); +late_initcall(intel_epb_init); -- cgit From 9aa9b4fcc311ede18adfe55e721115f7c59e60be Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 21 Nov 2023 13:44:05 +0000 Subject: x86/topology: remove arch_*register_cpu() exports arch_register_cpu() and arch_unregister_cpu() are not used by anything that can be a module - they are used by drivers/base/cpu.c and drivers/acpi/acpi_processor.c, neither of which can be a module. Remove the exports. Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R2r-00Csyh-7B@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/topology.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 0bab03130033..fcb62cfdf946 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -45,13 +45,11 @@ int arch_register_cpu(int cpu) xc->cpu.hotpluggable = cpu > 0; return register_cpu(&xc->cpu, cpu); } -EXPORT_SYMBOL(arch_register_cpu); void arch_unregister_cpu(int num) { unregister_cpu(&per_cpu(cpu_devices, num).cpu); } -EXPORT_SYMBOL(arch_unregister_cpu); #else /* CONFIG_HOTPLUG_CPU */ int __init arch_register_cpu(int num) -- cgit From 29d93102fd1e19024fce90995040fb3a46fd91c1 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 21 Nov 2023 13:44:10 +0000 Subject: Loongarch: remove arch_*register_cpu() exports arch_register_cpu() and arch_unregister_cpu() are not used by anything that can be a module - they are used by drivers/base/cpu.c and drivers/acpi/acpi_processor.c, neither of which can be a module. Remove the exports. Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R2w-00Csyn-E2@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/topology.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c index 3fd166006698..ae860fe81536 100644 --- a/arch/loongarch/kernel/topology.c +++ b/arch/loongarch/kernel/topology.c @@ -25,7 +25,6 @@ int arch_register_cpu(int cpu) return ret; } -EXPORT_SYMBOL(arch_register_cpu); void arch_unregister_cpu(int cpu) { @@ -34,7 +33,6 @@ void arch_unregister_cpu(int cpu) c->hotpluggable = 0; unregister_cpu(c); } -EXPORT_SYMBOL(arch_unregister_cpu); #endif static int __init topology_init(void) -- cgit From a02f66bb3cf475947b58dd3851b987b8ccd998c1 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Nov 2023 13:44:15 +0000 Subject: ACPI: Move ACPI_HOTPLUG_CPU to be disabled on arm64 and riscv Neither arm64 nor riscv support physical hotadd of CPUs that were not present at boot. For arm64 much of the platform description is in static tables which do not have update methods. arm64 does support HOTPLUG_CPU, which is backed by a firmware interface to turn CPUs on and off. acpi_processor_hotadd_init() and acpi_processor_remove() are for adding and removing CPUs that were not present at boot. arm64 systems that do this are not supported as there is currently insufficient information in the platform description. (e.g. did the GICR get removed too?) arm64 currently relies on the MADT enabled flag check in map_gicc_mpidr() to prevent CPUs that were not described as present at boot from being added to the system. Similarly, riscv relies on the same check in map_rintc_hartid(). Both architectures also rely on the weak 'always fails' definitions of acpi_map_cpu() and arch_register_cpu(). Subsequent changes will redefine ACPI_HOTPLUG_CPU as making possible CPUs present. Neither arm64 nor riscv support this. Disable ACPI_HOTPLUG_CPU for arm64 and riscv by removing 'default y' and selecting it on the other three ACPI architectures. This allows the weak definitions of some symbols to be removed. Signed-off-by: James Morse Reviewed-by: Shaoqin Huang Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R31-00Csyt-Jq@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/Kconfig | 1 + arch/x86/Kconfig | 1 + drivers/acpi/Kconfig | 1 - drivers/acpi/acpi_processor.c | 18 ------------------ 4 files changed, 2 insertions(+), 19 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index ee123820a476..331becb2cb4f 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -5,6 +5,7 @@ config LOONGARCH select ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_MCFG if ACPI + select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU select ACPI_PPTT if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_BINFMT_ELF_STATE diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3762f41bb092..dbdcfc708369 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -59,6 +59,7 @@ config X86 # select ACPI_LEGACY_TABLES_LOOKUP if ACPI select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI + select ACPI_HOTPLUG_CPU if ACPI_PROCESSOR && HOTPLUG_CPU select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index f819e760ff19..a3acfc750fce 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -310,7 +310,6 @@ config ACPI_HOTPLUG_CPU bool depends on ACPI_PROCESSOR && HOTPLUG_CPU select ACPI_CONTAINER - default y config ACPI_PROCESSOR_AGGREGATOR tristate "Processor Aggregator" diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 0f5218e361df..4fe2ef54088c 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -184,24 +184,6 @@ static void __init acpi_pcc_cpufreq_init(void) {} /* Initialization */ #ifdef CONFIG_ACPI_HOTPLUG_CPU -int __weak acpi_map_cpu(acpi_handle handle, - phys_cpuid_t physid, u32 acpi_id, int *pcpu) -{ - return -ENODEV; -} - -int __weak acpi_unmap_cpu(int cpu) -{ - return -ENODEV; -} - -int __weak arch_register_cpu(int cpu) -{ - return -ENODEV; -} - -void __weak arch_unregister_cpu(int cpu) {} - static int acpi_processor_hotadd_init(struct acpi_processor *pr) { unsigned long long sta; -- cgit From b0c69e1214bc20960c2ca68317b968e2a2057ed5 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Nov 2023 13:44:20 +0000 Subject: drivers: base: Use present CPUs in GENERIC_CPU_DEVICES Three of the five ACPI architectures create sysfs entries using register_cpu() for present CPUs, whereas arm64, riscv and all GENERIC_CPU_DEVICES do this for possible CPUs. Registering a CPU is what causes them to show up in sysfs. It makes very little sense to register all possible CPUs. Registering a CPU is what triggers the udev notifications allowing user-space to react to newly added CPUs. To allow all five ACPI architectures to use GENERIC_CPU_DEVICES, change it to use for_each_present_cpu(). Making the ACPI architectures use GENERIC_CPU_DEVICES is a pre-requisite step to centralise their register_cpu() logic, before moving it into the ACPI processor driver. When we add support for register CPUs from ACPI in a later patch, we will avoid registering CPUs in this path. Of the ACPI architectures that register possible CPUs, arm64 and riscv do not support making possible CPUs present as they use the weak 'always fails' version of arch_register_cpu(). Only two of the eight architectures that use GENERIC_CPU_DEVICES have a distinction between present and possible CPUs. The following architectures use GENERIC_CPU_DEVICES but are not SMP, so possible == present: * m68k * microblaze * nios2 The following architectures use GENERIC_CPU_DEVICES and consider possible == present: * csky: setup_smp() * processor_probe() sets possible for all CPUs and present for all CPUs except the boot cpu, which will have been done by init/main.c::start_kernel(). um appears to be a subarchitecture of x86. The remaining architecture using GENERIC_CPU_DEVICES are: * openrisc and hexagon: where smp_init_cpus() makes all CPUs < NR_CPUS possible, whereas smp_prepare_cpus() only makes CPUs < setup_max_cpus present. After this change, openrisc and hexagon systems that use the max_cpus command line argument would not see the other CPUs present in sysfs. This should not be a problem as these CPUs can't be brought online as _cpu_up() checks cpu_present(). After this change, only CPUs which are present appear in sysfs. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R36-00Csz0-Px@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- drivers/base/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 9ea22e165acd..34b48f660b6b 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -533,7 +533,7 @@ static void __init cpu_dev_register_generic(void) #ifdef CONFIG_GENERIC_CPU_DEVICES int i; - for_each_possible_cpu(i) { + for_each_present_cpu(i) { if (register_cpu(&per_cpu(cpu_devices, i), i)) panic("Failed to register CPU device"); } -- cgit From 0949dd96dffec39683c6066cf8d0877cebc321ec Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Nov 2023 13:44:25 +0000 Subject: drivers: base: Allow parts of GENERIC_CPU_DEVICES to be overridden Architectures often have extra per-cpu work that needs doing before a CPU is registered, often to determine if a CPU is hotpluggable. To allow the ACPI architectures to use GENERIC_CPU_DEVICES, move the cpu_register() call into arch_register_cpu(), which is made __weak so architectures with extra work can override it. This aligns with the way x86, ia64 and loongarch register hotplug CPUs when they become present. Signed-off-by: James Morse Reviewed-by: Shaoqin Huang Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R3B-00Csz6-Uh@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- drivers/base/cpu.c | 14 ++++++++++---- include/linux/cpu.h | 4 ++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 34b48f660b6b..579064fda97b 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -525,19 +525,25 @@ bool cpu_is_hotpluggable(unsigned int cpu) EXPORT_SYMBOL_GPL(cpu_is_hotpluggable); #ifdef CONFIG_GENERIC_CPU_DEVICES -static DEFINE_PER_CPU(struct cpu, cpu_devices); +DEFINE_PER_CPU(struct cpu, cpu_devices); + +int __weak arch_register_cpu(int cpu) +{ + return register_cpu(&per_cpu(cpu_devices, cpu), cpu); +} #endif static void __init cpu_dev_register_generic(void) { -#ifdef CONFIG_GENERIC_CPU_DEVICES int i; + if (!IS_ENABLED(CONFIG_GENERIC_CPU_DEVICES)) + return; + for_each_present_cpu(i) { - if (register_cpu(&per_cpu(cpu_devices, i), i)) + if (arch_register_cpu(i)) panic("Failed to register CPU device"); } -#endif } #ifdef CONFIG_GENERIC_CPU_VULNERABILITIES diff --git a/include/linux/cpu.h b/include/linux/cpu.h index fc8094419084..1e982d63eae8 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -88,6 +88,10 @@ extern ssize_t arch_cpu_probe(const char *, size_t); extern ssize_t arch_cpu_release(const char *, size_t); #endif +#ifdef CONFIG_GENERIC_CPU_DEVICES +DECLARE_PER_CPU(struct cpu, cpu_devices); +#endif + /* * These states are not related to the core CPU hotplug mechanism. They are * used by various (sub)architectures to track internal state -- cgit From 866ec3008691ff205b171413c52c5f1f328a2f8b Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Nov 2023 13:44:31 +0000 Subject: drivers: base: Implement weak arch_unregister_cpu() Add arch_unregister_cpu() to allow the ACPI machinery to call unregister_cpu(). This is enough for arm64, riscv and loongarch, but needs to be overridden by x86 and ia64 who need to do more work. CC: Jean-Philippe Brucker Signed-off-by: James Morse Reviewed-by: Shaoqin Huang Signed-off-by: "Russell King (Oracle)" Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R3H-00CszC-2n@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- drivers/base/cpu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 579064fda97b..58bb86091b34 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -531,7 +531,14 @@ int __weak arch_register_cpu(int cpu) { return register_cpu(&per_cpu(cpu_devices, cpu), cpu); } -#endif + +#ifdef CONFIG_HOTPLUG_CPU +void __weak arch_unregister_cpu(int num) +{ + unregister_cpu(&per_cpu(cpu_devices, num)); +} +#endif /* CONFIG_HOTPLUG_CPU */ +#endif /* CONFIG_GENERIC_CPU_DEVICES */ static void __init cpu_dev_register_generic(void) { -- cgit From bb5e44fb3be685ecb3feb120aca4269a92cc84cf Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 21 Nov 2023 13:44:36 +0000 Subject: drivers: base: add arch_cpu_is_hotpluggable() The differences between architecture specific implementations of arch_register_cpu() are down to whether the CPU is hotpluggable or not. Rather than overriding the weak version of arch_register_cpu(), provide a function that can be used to provide this detail instead. Reviewed-by: Shaoqin Huang Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R3M-00CszH-6r@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- drivers/base/cpu.c | 11 ++++++++++- include/linux/cpu.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 58bb86091b34..221ffbeb1c9b 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -527,9 +527,18 @@ EXPORT_SYMBOL_GPL(cpu_is_hotpluggable); #ifdef CONFIG_GENERIC_CPU_DEVICES DEFINE_PER_CPU(struct cpu, cpu_devices); +bool __weak arch_cpu_is_hotpluggable(int cpu) +{ + return false; +} + int __weak arch_register_cpu(int cpu) { - return register_cpu(&per_cpu(cpu_devices, cpu), cpu); + struct cpu *c = &per_cpu(cpu_devices, cpu); + + c->hotpluggable = arch_cpu_is_hotpluggable(cpu); + + return register_cpu(c, cpu); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 1e982d63eae8..dcb89c987164 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -80,6 +80,7 @@ extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, const char *fmt, ...); +extern bool arch_cpu_is_hotpluggable(int cpu); extern int arch_register_cpu(int cpu); extern void arch_unregister_cpu(int cpu); #ifdef CONFIG_HOTPLUG_CPU -- cgit From d631a881f1ab0091de35ae09ba48193a543666b5 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Nov 2023 13:44:41 +0000 Subject: drivers: base: Move cpu_dev_init() after node_dev_init() NUMA systems require the node descriptions to be ready before CPUs are registered. This is so that the node symlinks can be created in sysfs. Currently no NUMA platform uses GENERIC_CPU_DEVICES, meaning that CPUs are registered by arch code, instead of cpu_dev_init(). Move cpu_dev_init() after node_dev_init() so that NUMA architectures can use GENERIC_CPU_DEVICES. Signed-off-by: James Morse Signed-off-by: "Russell King (Oracle)" Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R3R-00CszO-C0@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- drivers/base/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/init.c b/drivers/base/init.c index 397eb9880cec..c4954835128c 100644 --- a/drivers/base/init.c +++ b/drivers/base/init.c @@ -35,8 +35,8 @@ void __init driver_init(void) of_core_init(); platform_bus_init(); auxiliary_bus_init(); - cpu_dev_init(); memory_dev_init(); node_dev_init(); + cpu_dev_init(); container_dev_init(); } -- cgit From ca00f7d999a61383c3e5b2c66537a8e769dd7327 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Nov 2023 13:44:46 +0000 Subject: drivers: base: Print a warning instead of panic() when register_cpu() fails loongarch, mips, parisc, riscv and sh all print a warning if register_cpu() returns an error. Architectures that use GENERIC_CPU_DEVICES call panic() instead. Errors in this path indicate something is wrong with the firmware description of the platform, but the kernel is able to keep running. Downgrade this to a warning to make it easier to debug this issue. This will allow architectures that switching over to GENERIC_CPU_DEVICES to drop their warning, but keep the existing behaviour. Signed-off-by: James Morse Reviewed-by: "Russell King (Oracle)" Reviewed-by: Shaoqin Huang Signed-off-by: "Russell King (Oracle)" Reviewed-by: Gavin Shan Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R3W-00CszU-GM@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- drivers/base/cpu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 221ffbeb1c9b..82b6a76125f5 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -551,14 +551,15 @@ void __weak arch_unregister_cpu(int num) static void __init cpu_dev_register_generic(void) { - int i; + int i, ret; if (!IS_ENABLED(CONFIG_GENERIC_CPU_DEVICES)) return; for_each_present_cpu(i) { - if (arch_register_cpu(i)) - panic("Failed to register CPU device"); + ret = arch_register_cpu(i); + if (ret) + pr_warn("register_cpu %d failed (%d)\n", i, ret); } } -- cgit From d127db1a23c94a876557b5bf8ca8bea49e8debb6 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Nov 2023 13:44:51 +0000 Subject: arm64: setup: Switch over to GENERIC_CPU_DEVICES using arch_register_cpu() To allow ACPI's _STA value to hide CPUs that are present, but not available to online right now due to VMM or firmware policy, the register_cpu() call needs to be made by the ACPI machinery when ACPI is in use. This allows it to hide CPUs that are unavailable from sysfs. Switching to GENERIC_CPU_DEVICES is an intermediate step to allow all five ACPI architectures to be modified at once. Switch over to GENERIC_CPU_DEVICES, and provide an arch_register_cpu() that populates the hotpluggable flag. arch_register_cpu() is also the interface the ACPI machinery expects. The struct cpu in struct cpuinfo_arm64 is never used directly, remove it to use the one GENERIC_CPU_DEVICES provides. This changes the CPUs visible in sysfs from possible to present, but on arm64 smp_prepare_cpus() ensures these are the same. This patch also has the effect of moving the registration of CPUs from subsys to driver core initialisation, prior to any initcalls running. Signed-off-by: James Morse Reviewed-by: "Russell King (Oracle)" Reviewed-by: Shaoqin Huang Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R3b-00Csza-Ku@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/cpu.h | 1 - arch/arm64/kernel/setup.c | 13 ++++--------- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7b071a00425d..84bce830e365 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -132,6 +132,7 @@ config ARM64 select GENERIC_ARCH_TOPOLOGY select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_DEVICES select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index f3034099fd95..b1e43f56ee46 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -38,7 +38,6 @@ struct cpuinfo_32bit { }; struct cpuinfo_arm64 { - struct cpu cpu; struct kobject kobj; u64 reg_ctr; u64 reg_cntfrq; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 417a8a86b2db..165bd2c0dd5a 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -402,19 +402,14 @@ static inline bool cpu_can_disable(unsigned int cpu) return false; } -static int __init topology_init(void) +int arch_register_cpu(int num) { - int i; + struct cpu *cpu = &per_cpu(cpu_devices, num); - for_each_possible_cpu(i) { - struct cpu *cpu = &per_cpu(cpu_data.cpu, i); - cpu->hotpluggable = cpu_can_disable(i); - register_cpu(cpu, i); - } + cpu->hotpluggable = cpu_can_disable(num); - return 0; + return register_cpu(cpu, num); } -subsys_initcall(topology_init); static void dump_kernel_offset(void) { -- cgit From 092cfbc6b51143fd216bd55f8811f427d2ef0a0f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 21 Nov 2023 13:44:56 +0000 Subject: arm64: convert to arch_cpu_is_hotpluggable() Convert arm64 to use the arch_cpu_is_hotpluggable() helper rather than arch_register_cpu(). Reviewed-by: Shaoqin Huang Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R3g-00Cszg-PP@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/setup.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 165bd2c0dd5a..42c690bb2d60 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -402,13 +402,9 @@ static inline bool cpu_can_disable(unsigned int cpu) return false; } -int arch_register_cpu(int num) +bool arch_cpu_is_hotpluggable(int num) { - struct cpu *cpu = &per_cpu(cpu_devices, num); - - cpu->hotpluggable = cpu_can_disable(num); - - return register_cpu(cpu, num); + return cpu_can_disable(num); } static void dump_kernel_offset(void) -- cgit From 5b95f94c3b9f12adf27e970d411e8a744e95cabf Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Nov 2023 13:45:01 +0000 Subject: x86/topology: Switch over to GENERIC_CPU_DEVICES Now that GENERIC_CPU_DEVICES calls arch_register_cpu(), which can be overridden by the arch code, switch over to this to allow common code to choose when the register_cpu() call is made. x86's struct cpus come from struct x86_cpu, which has no other members or users. Remove this and use the version defined by common code. This is an intermediate step to the logic being moved to drivers/acpi, where GENERIC_CPU_DEVICES will do the work when booting with acpi=off. This patch also has the effect of moving the registration of CPUs from subsys to driver core initialisation, prior to any initcalls running. ---- Changes since RFC: * Fixed the second copy of arch_register_cpu() used for non-hotplug Changes since RFC v2: * Remove duplicate of the weak generic arch_register_cpu(), spotted by Jonathan Cameron. Add note about initialisation order change. Changes since RFC v3: * Adapt to removal of EXPORT_SYMBOL()s Signed-off-by: James Morse Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R3l-00Cszm-UA@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 1 + arch/x86/include/asm/cpu.h | 4 ---- arch/x86/kernel/topology.c | 27 ++++----------------------- 3 files changed, 5 insertions(+), 27 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index dbdcfc708369..8330c4ac26b3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -148,6 +148,7 @@ config X86 select GENERIC_CLOCKEVENTS_MIN_ADJUST select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_DEVICES select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_ENTRY diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index fecc4fe1d68a..f8f9a9b79395 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -23,10 +23,6 @@ static inline void prefill_possible_map(void) {} #endif /* CONFIG_SMP */ -struct x86_cpu { - struct cpu cpu; -}; - #ifdef CONFIG_HOTPLUG_CPU extern void soft_restart_cpu(void); #endif diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index fcb62cfdf946..c2ed3145a93b 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -35,36 +35,17 @@ #include #include -static DEFINE_PER_CPU(struct x86_cpu, cpu_devices); - #ifdef CONFIG_HOTPLUG_CPU int arch_register_cpu(int cpu) { - struct x86_cpu *xc = per_cpu_ptr(&cpu_devices, cpu); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); - xc->cpu.hotpluggable = cpu > 0; - return register_cpu(&xc->cpu, cpu); + c->hotpluggable = cpu > 0; + return register_cpu(c, cpu); } void arch_unregister_cpu(int num) { - unregister_cpu(&per_cpu(cpu_devices, num).cpu); -} -#else /* CONFIG_HOTPLUG_CPU */ - -int __init arch_register_cpu(int num) -{ - return register_cpu(&per_cpu(cpu_devices, num).cpu, num); + unregister_cpu(&per_cpu(cpu_devices, num)); } #endif /* CONFIG_HOTPLUG_CPU */ - -static int __init topology_init(void) -{ - int i; - - for_each_present_cpu(i) - arch_register_cpu(i); - - return 0; -} -subsys_initcall(topology_init); -- cgit From b0b26bc580de555a504d7eed3866fca607bf1c1f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 21 Nov 2023 13:45:07 +0000 Subject: x86/topology: use weak version of arch_unregister_cpu() Since the x86 version of arch_unregister_cpu() is the same as the weak version, drop the x86 specific version. Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R3r-00Cszs-2R@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/topology.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index c2ed3145a93b..211863cb5b81 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -43,9 +43,4 @@ int arch_register_cpu(int cpu) c->hotpluggable = cpu > 0; return register_cpu(c, cpu); } - -void arch_unregister_cpu(int num) -{ - unregister_cpu(&per_cpu(cpu_devices, num)); -} #endif /* CONFIG_HOTPLUG_CPU */ -- cgit From e850a5c406450ae040d09c7b4161d6e75eff2a25 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 21 Nov 2023 13:45:12 +0000 Subject: x86/topology: convert to use arch_cpu_is_hotpluggable() Convert x86 to use the arch_cpu_is_hotpluggable() helper rather than arch_register_cpu(). Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R3w-00Cszy-6k@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/topology.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index 211863cb5b81..d42c28b8bfd8 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -36,11 +36,8 @@ #include #ifdef CONFIG_HOTPLUG_CPU -int arch_register_cpu(int cpu) +bool arch_cpu_is_hotpluggable(int cpu) { - struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); - - c->hotpluggable = cpu > 0; - return register_cpu(c, cpu); + return cpu > 0; } #endif /* CONFIG_HOTPLUG_CPU */ -- cgit From db3ba29a8315d89736b8af5c1ad945c9967d7655 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Nov 2023 13:45:17 +0000 Subject: LoongArch: Switch over to GENERIC_CPU_DEVICES Now that GENERIC_CPU_DEVICES calls arch_register_cpu(), which can be overridden by the arch code, switch over to this to allow common code to choose when the register_cpu() call is made. This allows topology_init() to be removed. This is an intermediate step to the logic being moved to drivers/acpi, where GENERIC_CPU_DEVICES will do the work when booting with acpi=off. This is a subtle change. Originally: - on boot, topology_init() would have marked present CPUs that io_master() is true for as hotplug-incapable. - if a CPU is hotplugged that is an io_master(), it can later be hot-unplugged. The new behaviour is that any CPU that io_master() is true for will now always be marked as hotplug-incapable, thus even if it was hotplugged, it can no longer be hot-unplugged. This patch also has the effect of moving the registration of CPUs from subsys to driver core initialisation, prior to any initcalls running. Signed-off-by: James Morse Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R41-00Ct04-Bg@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/Kconfig | 1 + arch/loongarch/kernel/topology.c | 29 ++--------------------------- 2 files changed, 3 insertions(+), 27 deletions(-) diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 331becb2cb4f..15d05dd2b7f3 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -72,6 +72,7 @@ config LOONGARCH select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_DEVICES select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY select GENERIC_IOREMAP if !ARCH_IOREMAP diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c index ae860fe81536..7dfb46c68f58 100644 --- a/arch/loongarch/kernel/topology.c +++ b/arch/loongarch/kernel/topology.c @@ -10,20 +10,13 @@ #include -static DEFINE_PER_CPU(struct cpu, cpu_devices); - #ifdef CONFIG_HOTPLUG_CPU int arch_register_cpu(int cpu) { - int ret; struct cpu *c = &per_cpu(cpu_devices, cpu); - c->hotpluggable = 1; - ret = register_cpu(c, cpu); - if (ret < 0) - pr_warn("register_cpu %d failed (%d)\n", cpu, ret); - - return ret; + c->hotpluggable = !io_master(cpu); + return register_cpu(c, cpu); } void arch_unregister_cpu(int cpu) @@ -34,21 +27,3 @@ void arch_unregister_cpu(int cpu) unregister_cpu(c); } #endif - -static int __init topology_init(void) -{ - int i, ret; - - for_each_present_cpu(i) { - struct cpu *c = &per_cpu(cpu_devices, i); - - c->hotpluggable = !io_master(i); - ret = register_cpu(c, i); - if (ret < 0) - pr_warn("topology_init: register_cpu %d failed (%d)\n", i, ret); - } - - return 0; -} - -subsys_initcall(topology_init); -- cgit From 0d122fb60046cd888877a6029cc23863d4a1b9e3 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Nov 2023 13:45:22 +0000 Subject: LoongArch: Use the __weak version of arch_unregister_cpu() LoongArch provides its own arch_unregister_cpu(). This clears the hotpluggable flag, then unregisters the CPU. It isn't necessary to clear the hotpluggable flag when unregistering a cpu. unregister_cpu() writes NULL to the percpu cpu_sys_devices pointer, meaning cpu_is_hotpluggable() will return false, as get_cpu_device() has returned NULL. Remove arch_unregister_cpu() and use the __weak version. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R46-00Ct0A-GJ@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/topology.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c index 7dfb46c68f58..866c2c9ef6ab 100644 --- a/arch/loongarch/kernel/topology.c +++ b/arch/loongarch/kernel/topology.c @@ -18,12 +18,4 @@ int arch_register_cpu(int cpu) c->hotpluggable = !io_master(cpu); return register_cpu(c, cpu); } - -void arch_unregister_cpu(int cpu) -{ - struct cpu *c = &per_cpu(cpu_devices, cpu); - - c->hotpluggable = 0; - unregister_cpu(c); -} #endif -- cgit From 13f9f0361c2e64f0dadb7964bfad11bf0a6fb7ab Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 21 Nov 2023 13:45:27 +0000 Subject: LoongArch: convert to use arch_cpu_is_hotpluggable() Convert loongarch to use the arch_cpu_is_hotpluggable() helper rather than arch_register_cpu(). Also remove the export as nothing should be using arch_register_cpu() outside of the core kernel/acpi code. Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R4B-00Ct0G-Kk@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/topology.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/loongarch/kernel/topology.c b/arch/loongarch/kernel/topology.c index 866c2c9ef6ab..75d5c51a7cd3 100644 --- a/arch/loongarch/kernel/topology.c +++ b/arch/loongarch/kernel/topology.c @@ -11,11 +11,8 @@ #include #ifdef CONFIG_HOTPLUG_CPU -int arch_register_cpu(int cpu) +bool arch_cpu_is_hotpluggable(int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); - - c->hotpluggable = !io_master(cpu); - return register_cpu(c, cpu); + return !io_master(cpu); } #endif -- cgit From 96cf2036514acec9bee7c25ca61badc64820d734 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Nov 2023 13:45:32 +0000 Subject: riscv: Switch over to GENERIC_CPU_DEVICES Now that GENERIC_CPU_DEVICES calls arch_register_cpu(), which can be overridden by the arch code, switch over to this to allow common code to choose when the register_cpu() call is made. This allows topology_init() to be removed. This is an intermediate step to the logic being moved to drivers/acpi, where GENERIC_CPU_DEVICES will do the work when booting with acpi=off. This patch also has the effect of moving the registration of CPUs from subsys to driver core initialisation, prior to any initcalls running. Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Acked-by: Palmer Dabbelt Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Samuel Holland Tested-by: Samuel Holland Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R4G-00Ct0M-PS@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/riscv/Kconfig | 1 + arch/riscv/kernel/setup.c | 19 ++++--------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 95a2a06acc6a..162425cb9739 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -71,6 +71,7 @@ config RISCV select GENERIC_ARCH_TOPOLOGY select GENERIC_ATOMIC64 if !64BIT select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CPU_DEVICES select GENERIC_EARLY_IOREMAP select GENERIC_ENTRY select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 535a837de55d..b3a0aa2b78d5 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -51,7 +51,6 @@ atomic_t hart_lottery __section(".sdata") #endif ; unsigned long boot_cpu_hartid; -static DEFINE_PER_CPU(struct cpu, cpu_devices); /* * Place kernel memory regions on the resource tree so that @@ -299,23 +298,13 @@ void __init setup_arch(char **cmdline_p) riscv_user_isa_enable(); } -static int __init topology_init(void) +int arch_register_cpu(int cpu) { - int i, ret; + struct cpu *c = &per_cpu(cpu_devices, cpu); - for_each_possible_cpu(i) { - struct cpu *cpu = &per_cpu(cpu_devices, i); - - cpu->hotpluggable = cpu_has_hotplug(i); - ret = register_cpu(cpu, i); - if (unlikely(ret)) - pr_warn("Warning: %s: register_cpu %d failed (%d)\n", - __func__, i, ret); - } - - return 0; + c->hotpluggable = cpu_has_hotplug(cpu); + return register_cpu(c, cpu); } -subsys_initcall(topology_init); void free_initmem(void) { -- cgit From 00bf4641201092fc06aa51f7dcf45d1ea4d3d4f7 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 21 Nov 2023 13:45:37 +0000 Subject: riscv: convert to use arch_cpu_is_hotpluggable() Convert riscv to use the arch_cpu_is_hotpluggable() helper rather than arch_register_cpu(). Acked-by: Palmer Dabbelt Reviewed-by: Gavin Shan Signed-off-by: "Russell King (Oracle)" Reviewed-by: Jonathan Cameron Reviewed-by: Samuel Holland Tested-by: Samuel Holland # On HiFive Unmatched Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1r5R4L-00Ct0d-To@rmk-PC.armlinux.org.uk Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/setup.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index b3a0aa2b78d5..7493fafbe4cb 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -298,12 +298,9 @@ void __init setup_arch(char **cmdline_p) riscv_user_isa_enable(); } -int arch_register_cpu(int cpu) +bool arch_cpu_is_hotpluggable(int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); - - c->hotpluggable = cpu_has_hotplug(cpu); - return register_cpu(c, cpu); + return cpu_has_hotplug(cpu); } void free_initmem(void) -- cgit From 5bb03d0dd76700a830243776a99275575cbb2ee1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 1 Nov 2023 10:33:33 +0100 Subject: base: soc: Remove usage of the deprecated ida_simple_xx() API ida_alloc() and ida_free() should be preferred to the deprecated ida_simple_get() and ida_simple_remove(). This is less verbose. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/f0ef849446c9b3df7d6b16b1a58d089b4c17276c.1698831191.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/base/soc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/soc.c b/drivers/base/soc.c index 8dec5228fde3..c741d0845852 100644 --- a/drivers/base/soc.c +++ b/drivers/base/soc.c @@ -106,7 +106,7 @@ static void soc_release(struct device *dev) { struct soc_device *soc_dev = container_of(dev, struct soc_device, dev); - ida_simple_remove(&soc_ida, soc_dev->soc_dev_num); + ida_free(&soc_ida, soc_dev->soc_dev_num); kfree(soc_dev->dev.groups); kfree(soc_dev); } @@ -155,7 +155,7 @@ struct soc_device *soc_device_register(struct soc_device_attribute *soc_dev_attr soc_attr_groups[1] = soc_dev_attr->custom_attr_group; /* Fetch a unique (reclaimable) SOC ID. */ - ret = ida_simple_get(&soc_ida, 0, 0, GFP_KERNEL); + ret = ida_alloc(&soc_ida, GFP_KERNEL); if (ret < 0) goto out3; soc_dev->soc_dev_num = ret; -- cgit From 48b5928e18dc27e05cab3dc4c78cd8a15baaf1e5 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Mon, 30 Oct 2023 00:42:39 -0400 Subject: base/node.c: initialize the accessor list before registering The current code registers the node as available in the node array before initializing the accessor list. This makes it so that anything which might access the accessor list as a result of allocations will cause an undefined memory access. In one example, an extension to access hmat data during interleave caused this undefined access as a result of a bulk allocation that occurs during node initialization but before the accessor list is initialized. Initialize the accessor list before making the node generally available to the global system. Fixes: 08d9dbe72b1f ("node: Link memory nodes to their compute nodes") Signed-off-by: Gregory Price Link: https://lore.kernel.org/r/20231030044239.971756-1-gregory.price@memverge.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/node.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/base/node.c b/drivers/base/node.c index 493d533f8375..4d588f4658c8 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -868,11 +868,15 @@ int __register_one_node(int nid) { int error; int cpu; + struct node *node; - node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL); - if (!node_devices[nid]) + node = kzalloc(sizeof(struct node), GFP_KERNEL); + if (!node) return -ENOMEM; + INIT_LIST_HEAD(&node->access_list); + node_devices[nid] = node; + error = register_node(node_devices[nid], nid); /* link cpu under this node */ @@ -881,7 +885,6 @@ int __register_one_node(int nid) register_cpu_under_node(cpu, nid); } - INIT_LIST_HEAD(&node_devices[nid]->access_list); node_init_caches(nid); return error; -- cgit From b17b70212dbf3f60ab95eb563dc165d235e75336 Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Mon, 30 Oct 2023 15:51:14 +0530 Subject: fs/sysfs/dir.c : Fix typo in comment Typo correction kboject => kobject Signed-off-by: Mukesh Ojha Link: https://lore.kernel.org/r/1698661274-32540-1-git-send-email-quic_mojha@quicinc.com Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index b6b6796e1616..4df2afa551dc 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -81,7 +81,7 @@ void sysfs_remove_dir(struct kobject *kobj) struct kernfs_node *kn = kobj->sd; /* - * In general, kboject owner is responsible for ensuring removal + * In general, kobject owner is responsible for ensuring removal * doesn't race with other operations and sysfs doesn't provide any * protection; however, when @kobj is used as a symlink target, the * symlinking entity usually doesn't own @kobj and thus has no -- cgit From 4c095734d92a46c243eca79b86ff3237fcce9a57 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 1 Nov 2023 10:36:04 +0100 Subject: software node: Remove usage of the deprecated ida_simple_xx() API ida_alloc() and ida_free() should be preferred to the deprecated ida_simple_get() and ida_simple_remove(). This is less verbose. Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/c7cdc3566c783d106138698b1e1923351fabace8.1698831275.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/base/swnode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index 1886995a0b3a..f7c4317b2ccf 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -747,10 +747,10 @@ static void software_node_release(struct kobject *kobj) struct swnode *swnode = kobj_to_swnode(kobj); if (swnode->parent) { - ida_simple_remove(&swnode->parent->child_ids, swnode->id); + ida_free(&swnode->parent->child_ids, swnode->id); list_del(&swnode->entry); } else { - ida_simple_remove(&swnode_root_ids, swnode->id); + ida_free(&swnode_root_ids, swnode->id); } if (swnode->allocated) @@ -776,8 +776,8 @@ swnode_register(const struct software_node *node, struct swnode *parent, if (!swnode) return ERR_PTR(-ENOMEM); - ret = ida_simple_get(parent ? &parent->child_ids : &swnode_root_ids, - 0, 0, GFP_KERNEL); + ret = ida_alloc(parent ? &parent->child_ids : &swnode_root_ids, + GFP_KERNEL); if (ret < 0) { kfree(swnode); return ERR_PTR(ret); -- cgit From bef52aa0f3de1b7d8c258c13b16e577361dabf3a Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 9 Nov 2023 12:10:08 +0200 Subject: acpi: property: Let args be NULL in __acpi_node_get_property_reference fwnode_get_property_reference_args() may not be called with args argument NULL on ACPI, OF already supports this. Add the missing NULL checks and document this. The purpose is to be able to count the references. Fixes: 977d5ad39f3e ("ACPI: Convert ACPI reference args to generic fwnode reference args") Signed-off-by: Sakari Ailus Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231109101010.1329587-2-sakari.ailus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/property.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 6979a3f9f90a..4d042673d57b 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -852,6 +852,7 @@ static int acpi_get_ref_args(struct fwnode_reference_args *args, * @index: Index of the reference to return * @num_args: Maximum number of arguments after each reference * @args: Location to store the returned reference with optional arguments + * (may be NULL) * * Find property with @name, verifify that it is a package containing at least * one object reference and if so, store the ACPI device object pointer to the @@ -908,6 +909,9 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, if (!device) return -EINVAL; + if (!args) + return 0; + args->fwnode = acpi_fwnode_handle(device); args->nargs = 0; return 0; -- cgit From 1eaea4b3604eb9ca7d9a1e73d88fc121bb4061f5 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 9 Nov 2023 12:10:09 +0200 Subject: software node: Let args be NULL in software_node_get_reference_args fwnode_get_property_reference_args() may not be called with args argument NULL and while OF already supports this. Add the missing NULL check. The purpose is to be able to count the references. Fixes: b06184acf751 ("software node: Add software_node_get_reference_args()") Signed-off-by: Sakari Ailus Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231109101010.1329587-3-sakari.ailus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/swnode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index f7c4317b2ccf..36512fb75a20 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -541,6 +541,9 @@ software_node_get_reference_args(const struct fwnode_handle *fwnode, if (nargs > NR_FWNODE_REFERENCE_ARGS) return -EINVAL; + if (!args) + return 0; + args->fwnode = software_node_get(refnode); args->nargs = nargs; -- cgit From 3babbf614ae66018fa4f97865a7e3a3b8a590fb0 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Thu, 9 Nov 2023 12:10:10 +0200 Subject: device property: fwnode_property_get_reference_args allows NULL args now All three fwnode_property_get_reference_args() implemantations now allow args argument to be NULL. Document this. Signed-off-by: Sakari Ailus Reviewed-by: Andy Shevchenko Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20231109101010.1329587-4-sakari.ailus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/property.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/property.c b/drivers/base/property.c index 8c40abed7852..8667b13639d2 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -508,6 +508,7 @@ EXPORT_SYMBOL_GPL(fwnode_property_match_string); * @nargs: Number of arguments. Ignored if @nargs_prop is non-NULL. * @index: Index of the reference, from zero onwards. * @args: Result structure with reference and integer arguments. + * May be NULL. * * Obtain a reference based on a named property in an fwnode, with * integer arguments. -- cgit From 055467378bf14d6cc6d17e52dcfc76313b531bd7 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 13 Nov 2023 14:09:47 -0800 Subject: driver core: Enable fw_devlink=rpm by default fw_devlink=on has stabilized and is working correctly. Let's start using device links created by fw_devlink to also enforce runtime PM ordering. Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20231113220948.80089-1-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index e57467194104..6736c1de3ba4 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1641,7 +1641,7 @@ static void device_links_purge(struct device *dev) #define FW_DEVLINK_FLAGS_RPM (FW_DEVLINK_FLAGS_ON | \ DL_FLAG_PM_RUNTIME) -static u32 fw_devlink_flags = FW_DEVLINK_FLAGS_ON; +static u32 fw_devlink_flags = FW_DEVLINK_FLAGS_RPM; static int __init fw_devlink_setup(char *arg) { if (!arg) -- cgit From 7c41da586e9f45bf8842b4dca08681df8d586ebb Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Wed, 22 Nov 2023 10:33:33 +0100 Subject: driver core: Emit reason for pending deferred probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ending a boot log with platform 3f202000.mmc: deferred probe pending is already a nice hint about the problem. Sometimes there is a more detailed error indicator available, add that to the output. Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20231122093332.274145-2-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/base/dd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 0c3725c3eefa..85152537dbf1 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -313,7 +313,7 @@ static void deferred_probe_timeout_work_func(struct work_struct *work) mutex_lock(&deferred_probe_mutex); list_for_each_entry(p, &deferred_probe_pending_list, deferred_probe) - dev_info(p->device, "deferred probe pending\n"); + dev_info(p->device, "deferred probe pending: %s", p->deferred_probe_reason ?: "(reason unknown)\n"); mutex_unlock(&deferred_probe_mutex); fw_devlink_probing_done(); -- cgit From fe3de0102bc830e78d80dbf6e2dd29c520d68575 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Fri, 8 Dec 2023 10:33:09 +0100 Subject: kernel/cgroup: use kernfs_create_dir_ns() By passing the fsugid to kernfs_create_dir_ns(), we don't need cgroup_kn_set_ugid() any longer. That function was added for exactly this purpose by commit 49957f8e2a43 ("cgroup: newly created dirs and files should be owned by the creator"). Eliminating this piece of duplicate code means we benefit from future improvements to kernfs_create_dir_ns(); for example, both are lacking S_ISGID support currently, which my next patch will add to kernfs_create_dir_ns(). It cannot (easily) be added to cgroup_kn_set_ugid() because we can't dereference struct kernfs_iattrs from there. -- v1 -> v2: 12-digit commit id Signed-off-by: Max Kellermann Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20231208093310.297233-1-max.kellermann@ionos.com Signed-off-by: Greg Kroah-Hartman --- kernel/cgroup/cgroup.c | 31 ++++--------------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 4b9ff41ca603..a844b421fd83 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4169,20 +4169,6 @@ static struct kernfs_ops cgroup_kf_ops = { .seq_show = cgroup_seqfile_show, }; -/* set uid and gid of cgroup dirs and files to that of the creator */ -static int cgroup_kn_set_ugid(struct kernfs_node *kn) -{ - struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID, - .ia_uid = current_fsuid(), - .ia_gid = current_fsgid(), }; - - if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) && - gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID)) - return 0; - - return kernfs_setattr(kn, &iattr); -} - static void cgroup_file_notify_timer(struct timer_list *timer) { cgroup_file_notify(container_of(timer, struct cgroup_file, @@ -4195,25 +4181,18 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp, char name[CGROUP_FILE_NAME_MAX]; struct kernfs_node *kn; struct lock_class_key *key = NULL; - int ret; #ifdef CONFIG_DEBUG_LOCK_ALLOC key = &cft->lockdep_key; #endif kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name), cgroup_file_mode(cft), - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + current_fsuid(), current_fsgid(), 0, cft->kf_ops, cft, NULL, key); if (IS_ERR(kn)) return PTR_ERR(kn); - ret = cgroup_kn_set_ugid(kn); - if (ret) { - kernfs_remove(kn); - return ret; - } - if (cft->file_offset) { struct cgroup_file *cfile = (void *)css + cft->file_offset; @@ -5616,7 +5595,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, goto out_cancel_ref; /* create the directory */ - kn = kernfs_create_dir(parent->kn, name, mode, cgrp); + kn = kernfs_create_dir_ns(parent->kn, name, mode, + current_fsuid(), current_fsgid(), + cgrp, NULL); if (IS_ERR(kn)) { ret = PTR_ERR(kn); goto out_stat_exit; @@ -5761,10 +5742,6 @@ int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) */ kernfs_get(cgrp->kn); - ret = cgroup_kn_set_ugid(cgrp->kn); - if (ret) - goto out_destroy; - ret = css_populate_dir(&cgrp->self); if (ret) goto out_destroy; -- cgit From 5133bee62f0ea5d4c316d503cc0040cac5637601 Mon Sep 17 00:00:00 2001 From: Max Kellermann Date: Fri, 8 Dec 2023 10:33:10 +0100 Subject: fs/kernfs/dir: obey S_ISGID Handling of S_ISGID is usually done by inode_init_owner() in all other filesystems, but kernfs doesn't use that function. In kernfs, struct kernfs_node is the primary data structure, and struct inode is only created from it on demand. Therefore, inode_init_owner() can't be used and we need to imitate its behavior. S_ISGID support is useful for the cgroup filesystem; it allows subtrees managed by an unprivileged process to retain a certain owner gid, which then enables sharing access to the subtree with another unprivileged process. -- v1 -> v2: minor coding style fix (comment) Signed-off-by: Max Kellermann Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20231208093310.297233-2-max.kellermann@ionos.com Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 8b2bd65d70e7..62d39ecf0a46 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -676,6 +676,18 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, { struct kernfs_node *kn; + if (parent->mode & S_ISGID) { + /* this code block imitates inode_init_owner() for + * kernfs + */ + + if (parent->iattr) + gid = parent->iattr->ia_gid; + + if (flags & KERNFS_DIR) + mode |= S_ISGID; + } + kn = __kernfs_new_node(kernfs_root(parent), parent, name, mode, uid, gid, flags); if (kn) { -- cgit From 2678fd2fe9ee2c569e9cb6b17e786bc8f0753538 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 7 Dec 2023 23:56:54 +0000 Subject: initramfs: Expose retained initrd as sysfs file When the kernel command line option "retain_initrd" is set, we do not free the initrd memory. However, we also don't expose it to anyone for consumption. That leaves us in a weird situation where the only user of this feature is ppc64 and arm64 specific kexec tooling. To make it more generally useful, this patch adds a kobject to the firmware object that contains the initrd context when "retain_initrd" is set. That way, we can access the initrd any time after boot from user space and for example hand it into kexec as --initrd parameter if we want to reboot the same initrd. Or inspect it directly locally. With this patch applied, there is a new /sys/firmware/initrd file when the kernel was booted with an initrd and "retain_initrd" command line option is set. Signed-off-by: Alexander Graf Tested-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20231207235654.16622-1-graf@amazon.com Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-firmware-initrd | 8 ++++++++ Documentation/admin-guide/kernel-parameters.txt | 5 +++-- init/initramfs.c | 18 +++++++++++++++++- 3 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-firmware-initrd diff --git a/Documentation/ABI/testing/sysfs-firmware-initrd b/Documentation/ABI/testing/sysfs-firmware-initrd new file mode 100644 index 000000000000..20bf7cf77a19 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-firmware-initrd @@ -0,0 +1,8 @@ +What: /sys/firmware/initrd +Date: December 2023 +Contact: Alexander Graf +Description: + When the kernel was booted with an initrd and the + "retain_initrd" option is set on the kernel command + line, /sys/firmware/initrd contains the contents of the + initrd that the kernel was booted with. diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 65731b060e3f..51575cd31741 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2438,7 +2438,7 @@ between unregistering the boot console and initializing the real console. - keepinitrd [HW,ARM] + keepinitrd [HW,ARM] See retain_initrd. kernelcore= [KNL,X86,IA-64,PPC] Format: nn[KMGTPE] | nn% | "mirror" @@ -5580,7 +5580,8 @@ Useful for devices that are detected asynchronously (e.g. USB and MMC devices). - retain_initrd [RAM] Keep initrd memory after extraction + retain_initrd [RAM] Keep initrd memory after extraction. After boot, it will + be accessible via /sys/firmware/initrd. retbleed= [X86] Control mitigation of RETBleed (Arbitrary Speculative Code Execution with Return Instructions) diff --git a/init/initramfs.c b/init/initramfs.c index 8d0fd946cdd2..76deb48c38cb 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -574,6 +574,16 @@ extern unsigned long __initramfs_size; #include #include +static ssize_t raw_read(struct file *file, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t pos, size_t count) +{ + memcpy(buf, attr->private + pos, count); + return count; +} + +static BIN_ATTR(initrd, 0440, raw_read, NULL, 0); + void __init reserve_initrd_mem(void) { phys_addr_t start; @@ -715,8 +725,14 @@ done: * If the initrd region is overlapped with crashkernel reserved region, * free only memory that is not part of crashkernel region. */ - if (!do_retain_initrd && initrd_start && !kexec_free_initrd()) + if (!do_retain_initrd && initrd_start && !kexec_free_initrd()) { free_initrd_mem(initrd_start, initrd_end); + } else if (do_retain_initrd && initrd_start) { + bin_attr_initrd.size = initrd_end - initrd_start; + bin_attr_initrd.private = (void *)initrd_start; + if (sysfs_create_bin_file(firmware_kobj, &bin_attr_initrd)) + pr_err("Failed to create initrd sysfs file"); + } initrd_start = 0; initrd_end = 0; -- cgit From 792e04768efbf2a1b49a7162a9fa06c1fa584723 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 12 Dec 2023 13:17:38 -0800 Subject: kernfs: Convert kernfs_walk_ns() from strlcpy() to strscpy() strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated[1]. Additionally, it returns the size of the source string, not the resulting size of the destination string. In an effort to remove strlcpy() completely[2], replace strlcpy() here with strscpy(). Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [1] Link: https://github.com/KSPP/linux/issues/89 [2] Cc: Greg Kroah-Hartman Cc: Tejun Heo Cc: Azeem Shaikh Link: https://lore.kernel.org/r/20231116192127.1558276-1-keescook@chromium.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20231212211741.164376-1-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 62d39ecf0a46..fa9077576e02 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -862,16 +862,16 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, const unsigned char *path, const void *ns) { - size_t len; + ssize_t len; char *p, *name; lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem); spin_lock_irq(&kernfs_pr_cont_lock); - len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf)); + len = strscpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf)); - if (len >= sizeof(kernfs_pr_cont_buf)) { + if (len < 0) { spin_unlock_irq(&kernfs_pr_cont_lock); return NULL; } -- cgit From 5b56bf5cdb8b7c989055fe4d73fe3f409427d1d5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 12 Dec 2023 13:17:39 -0800 Subject: kernfs: Convert kernfs_name_locked() from strlcpy() to strscpy() strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated[1]. Additionally, it returns the size of the source string, not the resulting size of the destination string. In an effort to remove strlcpy() completely[2], replace strlcpy() here with strscpy(). Nothing actually checks the return value coming from kernfs_name_locked(), so this has no impact on error paths. The caller hierarchy is: kernfs_name_locked() kernfs_name() pr_cont_kernfs_name() return value ignored cgroup_name() current_css_set_cg_links_read() return value ignored print_page_owner_memcg() return value ignored Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [1] Link: https://github.com/KSPP/linux/issues/89 [2] Cc: Greg Kroah-Hartman Cc: Tejun Heo Cc: Azeem Shaikh Link: https://lore.kernel.org/r/20231116192127.1558276-2-keescook@chromium.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20231212211741.164376-2-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index fa9077576e02..d23bf7883b08 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -54,9 +54,9 @@ static bool kernfs_lockdep(struct kernfs_node *kn) static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) { if (!kn) - return strlcpy(buf, "(null)", buflen); + return strscpy(buf, "(null)", buflen); - return strlcpy(buf, kn->parent ? kn->name : "/", buflen); + return strscpy(buf, kn->parent ? kn->name : "/", buflen); } /* kernfs_node_depth - compute depth from @from to @to */ @@ -182,12 +182,12 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, * @buflen: size of @buf * * Copies the name of @kn into @buf of @buflen bytes. The behavior is - * similar to strlcpy(). + * similar to strscpy(). * * Fills buffer with "(null)" if @kn is %NULL. * - * Return: the length of @kn's name and if @buf isn't long enough, - * it's filled up to @buflen-1 and nul terminated. + * Return: the resulting length of @buf. If @buf isn't long enough, + * it's filled up to @buflen-1 and nul terminated, and returns -E2BIG. * * This function can be called from any context. */ -- cgit From ff6d413b0b59466e5acf2e42f294b1842ae130a1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 12 Dec 2023 13:17:40 -0800 Subject: kernfs: Convert kernfs_path_from_node_locked() from strlcpy() to strscpy() One of the last remaining users of strlcpy() in the kernel is kernfs_path_from_node_locked(), which passes back the problematic "length we _would_ have copied" return value to indicate truncation. Convert the chain of all callers to use the negative return value (some of which already doing this explicitly). All callers were already also checking for negative return values, so the risk to missed checks looks very low. In this analysis, it was found that cgroup1_release_agent() actually didn't handle the "too large" condition, so this is technically also a bug fix. :) Here's the chain of callers, and resolution identifying each one as now handling the correct return value: kernfs_path_from_node_locked() kernfs_path_from_node() pr_cont_kernfs_path() returns void kernfs_path() sysfs_warn_dup() return value ignored cgroup_path() blkg_path() bfq_bic_update_cgroup() return value ignored TRACE_IOCG_PATH() return value ignored TRACE_CGROUP_PATH() return value ignored perf_event_cgroup() return value ignored task_group_path() return value ignored damon_sysfs_memcg_path_eq() return value ignored get_mm_memcg_path() return value ignored lru_gen_seq_show() return value ignored cgroup_path_from_kernfs_id() return value ignored cgroup_show_path() already converted "too large" error to negative value cgroup_path_ns_locked() cgroup_path_ns() bpf_iter_cgroup_show_fdinfo() return value ignored cgroup1_release_agent() wasn't checking "too large" error proc_cgroup_show() already converted "too large" to negative value Cc: Greg Kroah-Hartman Cc: Tejun Heo Cc: Zefan Li Cc: Johannes Weiner Cc: Waiman Long Cc: Co-developed-by: Azeem Shaikh Signed-off-by: Azeem Shaikh Link: https://lore.kernel.org/r/20231116192127.1558276-3-keescook@chromium.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20231212211741.164376-3-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 34 +++++++++++++++++----------------- kernel/cgroup/cgroup-v1.c | 2 +- kernel/cgroup/cgroup.c | 4 ++-- kernel/cgroup/cpuset.c | 2 +- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index d23bf7883b08..bce1d7ac95ca 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -127,7 +127,7 @@ static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a, * * [3] when @kn_to is %NULL result will be "(null)" * - * Return: the length of the full path. If the full length is equal to or + * Return: the length of the constructed path. If the path would have been * greater than @buflen, @buf contains the truncated path with the trailing * '\0'. On error, -errno is returned. */ @@ -138,16 +138,17 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, struct kernfs_node *kn, *common; const char parent_str[] = "/.."; size_t depth_from, depth_to, len = 0; + ssize_t copied; int i, j; if (!kn_to) - return strlcpy(buf, "(null)", buflen); + return strscpy(buf, "(null)", buflen); if (!kn_from) kn_from = kernfs_root(kn_to)->kn; if (kn_from == kn_to) - return strlcpy(buf, "/", buflen); + return strscpy(buf, "/", buflen); common = kernfs_common_ancestor(kn_from, kn_to); if (WARN_ON(!common)) @@ -158,18 +159,19 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, buf[0] = '\0'; - for (i = 0; i < depth_from; i++) - len += strlcpy(buf + len, parent_str, - len < buflen ? buflen - len : 0); + for (i = 0; i < depth_from; i++) { + copied = strscpy(buf + len, parent_str, buflen - len); + if (copied < 0) + return copied; + len += copied; + } /* Calculate how many bytes we need for the rest */ for (i = depth_to - 1; i >= 0; i--) { for (kn = kn_to, j = 0; j < i; j++) kn = kn->parent; - len += strlcpy(buf + len, "/", - len < buflen ? buflen - len : 0); - len += strlcpy(buf + len, kn->name, - len < buflen ? buflen - len : 0); + + len += scnprintf(buf + len, buflen - len, "/%s", kn->name); } return len; @@ -214,7 +216,7 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) * path (which includes '..'s) as needed to reach from @from to @to is * returned. * - * Return: the length of the full path. If the full length is equal to or + * Return: the length of the constructed path. If the path would have been * greater than @buflen, @buf contains the truncated path with the trailing * '\0'. On error, -errno is returned. */ @@ -265,12 +267,10 @@ void pr_cont_kernfs_path(struct kernfs_node *kn) sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); if (sz < 0) { - pr_cont("(error)"); - goto out; - } - - if (sz >= sizeof(kernfs_pr_cont_buf)) { - pr_cont("(name too long)"); + if (sz == -E2BIG) + pr_cont("(name too long)"); + else + pr_cont("(error)"); goto out; } diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 76db6c67e39a..9cb00ebe9ac6 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -802,7 +802,7 @@ void cgroup1_release_agent(struct work_struct *work) goto out_free; ret = cgroup_path_ns(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns); - if (ret < 0 || ret >= PATH_MAX) + if (ret < 0) goto out_free; argv[0] = agentbuf; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a844b421fd83..4bc8183b669f 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1893,7 +1893,7 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX); spin_unlock_irq(&css_set_lock); - if (len >= PATH_MAX) + if (len == -E2BIG) len = -ERANGE; else if (len > 0) { seq_escape(sf, buf, " \t\n\\"); @@ -6278,7 +6278,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) { retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX, current->nsproxy->cgroup_ns); - if (retval >= PATH_MAX) + if (retval == -E2BIG) retval = -ENAMETOOLONG; if (retval < 0) goto out_unlock; diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 615daaf87f1f..fb29158ae825 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -4941,7 +4941,7 @@ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX, current->nsproxy->cgroup_ns); css_put(css); - if (retval >= PATH_MAX) + if (retval == -E2BIG) retval = -ENAMETOOLONG; if (retval < 0) goto out_free; -- cgit From 532888a59505da2a3fbb4abac6adad381cedb374 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 15 Dec 2023 18:45:41 +0100 Subject: driver core: Better advertise dev_err_probe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Describing the usage of dev_err_probe() as being (only?) "deemed acceptable" has a bad connotation. In fact dev_err_probe() fulfills three tasks: - handling of EPROBE_DEFER (even more than degrading to dev_dbg()) - symbolic output of the error code - return err for compact error code paths Advertise these better and claim the usage to be "fine" to get rid of the bad connotation. Acked-by: Rafael J. Wysocki Signed-off-by: Uwe Kleine-König Link: https://lore.kernel.org/r/20231215174540.2438601-2-u.kleine-koenig@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 6736c1de3ba4..14d46af40f9a 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -4944,13 +4944,14 @@ define_dev_printk_level(_dev_info, KERN_INFO); * * return dev_err_probe(dev, err, ...); * - * Note that it is deemed acceptable to use this function for error - * prints during probe even if the @err is known to never be -EPROBE_DEFER. + * Using this helper in your probe function is totally fine even if @err is + * known to never be -EPROBE_DEFER. * The benefit compared to a normal dev_err() is the standardized format - * of the error code and the fact that the error code is returned. + * of the error code, it being emitted symbolically (i.e. you get "EAGAIN" + * instead of "-35") and the fact that the error code is returned which allows + * more compact error paths. * * Returns @err. - * */ int dev_err_probe(const struct device *dev, int err, const char *fmt, ...) { -- cgit From 64c166821e034dda14e7a1a2d648347662054e0e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Dec 2023 05:22:29 +0000 Subject: kernfs: d_obtain_alias(NULL) will do the right thing... Signed-off-by: Al Viro Link: https://lore.kernel.org/r/20231220052229.GH1674809@ZenIV Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/mount.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 4628edde2e7e..0c93cad0f0ac 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -125,9 +125,6 @@ static struct dentry *__kernfs_fh_to_dentry(struct super_block *sb, inode = kernfs_get_inode(sb, kn); kernfs_put(kn); - if (!inode) - return ERR_PTR(-ESTALE); - return d_obtain_alias(inode); } -- cgit From 5ae81209491ed3718fee798db6fb2cc81214824c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Dec 2023 14:52:36 +0100 Subject: driver core: bus: make bus_sort_breadthfirst() take a const pointer For some reason, during the big "clean up the driver core for a const struct bus_type" work, the bus_sort_breadthfirst() call was missed. Fix this up by changing the type to be a const * as it should be. Cc: Rafael J. Wysocki Link: https://lore.kernel.org/r/2023121935-stinking-ditzy-fd5d@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 2 +- include/linux/device/bus.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 84a21084d67d..33d952695927 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -1030,7 +1030,7 @@ static void device_insertion_sort_klist(struct device *a, struct list_head *list list_move_tail(&a->p->knode_bus.n_node, list); } -void bus_sort_breadthfirst(struct bus_type *bus, +void bus_sort_breadthfirst(const struct bus_type *bus, int (*compare)(const struct device *a, const struct device *b)) { diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h index ae10c4322754..25127f750349 100644 --- a/include/linux/device/bus.h +++ b/include/linux/device/bus.h @@ -232,7 +232,7 @@ bus_find_device_by_acpi_dev(const struct bus_type *bus, const void *adev) int bus_for_each_drv(const struct bus_type *bus, struct device_driver *start, void *data, int (*fn)(struct device_driver *, void *)); -void bus_sort_breadthfirst(struct bus_type *bus, +void bus_sort_breadthfirst(const struct bus_type *bus, int (*compare)(const struct device *a, const struct device *b)); /* -- cgit From 32f78abe59c740b6ec34c89dc10a09208eae7e1f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Dec 2023 14:15:09 +0100 Subject: driver core: bus: constantify subsys_register() calls The functions subsys_register() and subsys_virtual_register() should be taking a constant pointer to a struct bus_type, as they do not actually modify anything in it, so fix up the function definitions to do so properly. This also changes the pointer type in struct subsys_interface to be constant as well, as again, that's the proper signature of it. Cc: Rafael J. Wysocki Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/2023121908-grove-genetics-f8af@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 6 +++--- include/linux/device.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 33d952695927..daee55c9b2d9 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -1194,7 +1194,7 @@ static void system_root_device_release(struct device *dev) kfree(dev); } -static int subsys_register(struct bus_type *subsys, +static int subsys_register(const struct bus_type *subsys, const struct attribute_group **groups, struct kobject *parent_of_root) { @@ -1264,7 +1264,7 @@ err_sp: * directory itself and not some create fake root-device placed in * /sys/devices/system/. */ -int subsys_system_register(struct bus_type *subsys, +int subsys_system_register(const struct bus_type *subsys, const struct attribute_group **groups) { return subsys_register(subsys, groups, &system_kset->kobj); @@ -1282,7 +1282,7 @@ EXPORT_SYMBOL_GPL(subsys_system_register); * There's no restriction on device naming. This is for kernel software * constructs which need sysfs interface. */ -int subsys_virtual_register(struct bus_type *subsys, +int subsys_virtual_register(const struct bus_type *subsys, const struct attribute_group **groups) { struct kobject *virtual_dir; diff --git a/include/linux/device.h b/include/linux/device.h index 4aa34c8d1361..aefc5ca7f1cf 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -63,7 +63,7 @@ struct msi_device_data; */ struct subsys_interface { const char *name; - struct bus_type *subsys; + const struct bus_type *subsys; struct list_head node; int (*add_dev)(struct device *dev, struct subsys_interface *sif); void (*remove_dev)(struct device *dev, struct subsys_interface *sif); @@ -72,9 +72,9 @@ struct subsys_interface { int subsys_interface_register(struct subsys_interface *sif); void subsys_interface_unregister(struct subsys_interface *sif); -int subsys_system_register(struct bus_type *subsys, +int subsys_system_register(const struct bus_type *subsys, const struct attribute_group **groups); -int subsys_virtual_register(struct bus_type *subsys, +int subsys_virtual_register(const struct bus_type *subsys, const struct attribute_group **groups); /* -- cgit From dedb868994d8308c6c4650203e190ec619005806 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Dec 2023 16:03:20 +0100 Subject: driver core: container: make container_subsys const Now that the driver core can properly handle constant struct bus_type, move the container_subsys variable to be a constant structure as well, placing it into read-only memory which can not be modified at runtime. Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/2023121919-chatter-grumbling-9ef3@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/base/container.c | 2 +- include/linux/container.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/container.c b/drivers/base/container.c index 1ba42d2d3532..f40588ebc3f5 100644 --- a/drivers/base/container.c +++ b/drivers/base/container.c @@ -24,7 +24,7 @@ static int container_offline(struct device *dev) return cdev->offline ? cdev->offline(cdev) : 0; } -struct bus_type container_subsys = { +const struct bus_type container_subsys = { .name = CONTAINER_BUS_NAME, .dev_name = CONTAINER_BUS_NAME, .online = trivial_online, diff --git a/include/linux/container.h b/include/linux/container.h index 2566a1baa736..dd00cc918a92 100644 --- a/include/linux/container.h +++ b/include/linux/container.h @@ -12,7 +12,7 @@ #include /* drivers/base/power/container.c */ -extern struct bus_type container_subsys; +extern const struct bus_type container_subsys; struct container_dev { struct device dev; -- cgit From 580fc9c750fde7404f2d726637135fb785c67e86 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Dec 2023 16:35:09 +0100 Subject: driver core: mark remaining local bus_type variables as const Now that the driver core can properly handle constant struct bus_type, change the local driver core bus_type variables to be a constant structure as well, placing them into read-only memory which can not be modified at runtime. Cc: Ira Weiny Cc: Rafael J. Wysocki Cc: David Hildenbrand Cc: Oscar Salvador Cc: Kevin Hilman Cc: Ulf Hansson Cc: Len Brown Acked-by: William Breathitt Gray Acked-by: Dave Ertman Link: https://lore.kernel.org/r/2023121908-paver-follow-cc21@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/base/auxiliary.c | 2 +- drivers/base/isa.c | 2 +- drivers/base/memory.c | 2 +- drivers/base/node.c | 2 +- drivers/base/power/domain.c | 2 +- drivers/base/soc.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index 4d4c2c8d26c4..d3a2c40c2f12 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -244,7 +244,7 @@ static void auxiliary_bus_shutdown(struct device *dev) auxdrv->shutdown(auxdev); } -static struct bus_type auxiliary_bus_type = { +static const struct bus_type auxiliary_bus_type = { .name = "auxiliary", .probe = auxiliary_bus_probe, .remove = auxiliary_bus_remove, diff --git a/drivers/base/isa.c b/drivers/base/isa.c index 675ad3139224..e23d0b49a793 100644 --- a/drivers/base/isa.c +++ b/drivers/base/isa.c @@ -82,7 +82,7 @@ static int isa_bus_resume(struct device *dev) return 0; } -static struct bus_type isa_bus_type = { +static const struct bus_type isa_bus_type = { .name = "isa", .match = isa_bus_match, .probe = isa_bus_probe, diff --git a/drivers/base/memory.c b/drivers/base/memory.c index f3b9a4d0fa3b..4ac3266da6b5 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -68,7 +68,7 @@ static inline unsigned long phys_to_block_id(unsigned long phys) static int memory_subsys_online(struct device *dev); static int memory_subsys_offline(struct device *dev); -static struct bus_type memory_subsys = { +static const struct bus_type memory_subsys = { .name = MEMORY_CLASS_NAME, .dev_name = MEMORY_CLASS_NAME, .online = memory_subsys_online, diff --git a/drivers/base/node.c b/drivers/base/node.c index 4d588f4658c8..433897eecbdc 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -21,7 +21,7 @@ #include #include -static struct bus_type node_subsys = { +static const struct bus_type node_subsys = { .name = "node", .dev_name = "node", }; diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index da1777e39eaa..096871334cc7 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2670,7 +2670,7 @@ static void genpd_release_dev(struct device *dev) kfree(dev); } -static struct bus_type genpd_bus_type = { +static const struct bus_type genpd_bus_type = { .name = "genpd", }; diff --git a/drivers/base/soc.c b/drivers/base/soc.c index c741d0845852..282c38aece0d 100644 --- a/drivers/base/soc.c +++ b/drivers/base/soc.c @@ -28,7 +28,7 @@ struct soc_device { int soc_dev_num; }; -static struct bus_type soc_bus_type = { +static const struct bus_type soc_bus_type = { .name = "soc", }; static bool soc_bus_registered; -- cgit From 520adf3ba4a4bdd41450c57b17ef01f8a069fbfe Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 22 Dec 2023 21:05:22 -0800 Subject: driver core: class: fix Excess kernel-doc description warning Remove the @p: lines to prevent the kernel-doc warning: include/linux/device/class.h:72: warning: Excess struct member 'p' description in 'class' Signed-off-by: Randy Dunlap Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20231223050522.13867-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device/class.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/device/class.h b/include/linux/device/class.h index abf3d3bfb6fe..c576b49c55c2 100644 --- a/include/linux/device/class.h +++ b/include/linux/device/class.h @@ -40,8 +40,6 @@ struct fwnode_handle; * for the devices belonging to the class. Usually tied to * device's namespace. * @pm: The default device power management operations of this class. - * @p: The private data of the driver core, no one other than the - * driver core can touch this. * * A class is a higher-level view of a device that abstracts out low-level * implementation details. Drivers may see a SCSI disk or an ATA disk, but, -- cgit From ae4d90f7ca49eb71f8a3dca64d06d4c4e2193705 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 22 Dec 2023 21:05:32 -0800 Subject: driver core: device.h: fix Excess kernel-doc description warning Remove the @knode_class: line to prevent the kernel-doc warning: include/linux/device.h:807: warning: Excess struct member 'knode_class' description in 'device' Signed-off-by: Randy Dunlap Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20231223050532.13881-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/device.h b/include/linux/device.h index aefc5ca7f1cf..ed600dbf950e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -662,7 +662,6 @@ struct device_physical_location { * @id: device instance * @devres_lock: Spinlock to protect the resource of the device. * @devres_head: The resources list of the device. - * @knode_class: The node used to add the device to the class list. * @class: The class of the device. * @groups: Optional attribute groups. * @release: Callback to free the device after all references have -- cgit From c810729fe6471aa18e2b05bde4b7fb9d872b4ca0 Mon Sep 17 00:00:00 2001 From: Ahelenia Ziemiańska Date: Thu, 21 Dec 2023 15:15:42 +0100 Subject: kernfs: fix reference to renamed function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c637b8acbe079edb477d887041755b489036f146 ("kernfs: s/sysfs/kernfs/ in internal functions and whatever is left") renamed kernfs_file_open to kernfs_fop_open, but didn't update the comment referencing it. Signed-off-by: Ahelenia Ziemiańska Acked-by: Tejun Heo Link: https://lore.kernel.org/r/4f2wybrepigxjpuxj4bdkh3qmksetfioedit2bdrswf6b75ebb@tarta.nabijaczleweli.xyz Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index f0cb729e9a97..ffa4565c275a 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -447,7 +447,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) * warnings and we don't want to add spurious locking dependency * between the two. Check whether mmap is actually implemented * without grabbing @of->mutex by testing HAS_MMAP flag. See the - * comment in kernfs_file_open() for more details. + * comment in kernfs_fop_open() for more details. */ if (!(of->kn->flags & KERNFS_HAS_MMAP)) return -ENODEV; -- cgit From f36be9ce8146faabdbbf74ee0499edb2039c53a5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Dec 2023 14:13:10 +0100 Subject: EDAC: constantify the struct bus_type usage In many places in the edac code, struct bus_type pointers are passed around and then eventually sent to the driver core, which can handle a constant pointer. So constantify all of the edac usage of these as well because the data in them is never modified by the edac code either. Cc: Borislav Petkov Cc: Tony Luck Cc: James Morse Cc: Mauro Carvalho Chehab Cc: Robert Richter Cc: Link: https://lore.kernel.org/r/2023121909-tribute-punctuate-4b22@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/edac/edac_device.h | 2 +- drivers/edac/edac_device_sysfs.c | 2 +- drivers/edac/edac_module.c | 4 ++-- drivers/edac/edac_pci_sysfs.c | 2 +- include/linux/edac.h | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h index 3f44e6b9d387..7db22a4c83ef 100644 --- a/drivers/edac/edac_device.h +++ b/drivers/edac/edac_device.h @@ -176,7 +176,7 @@ struct edac_device_ctl_info { struct edac_dev_sysfs_attribute *sysfs_attributes; /* pointer to main 'edac' subsys in sysfs */ - struct bus_type *edac_subsys; + const struct bus_type *edac_subsys; /* the internal state of this controller instance */ int op_state; diff --git a/drivers/edac/edac_device_sysfs.c b/drivers/edac/edac_device_sysfs.c index 010c26be5846..237a542e045a 100644 --- a/drivers/edac/edac_device_sysfs.c +++ b/drivers/edac/edac_device_sysfs.c @@ -229,7 +229,7 @@ static struct kobj_type ktype_device_ctrl = { int edac_device_register_sysfs_main_kobj(struct edac_device_ctl_info *edac_dev) { struct device *dev_root; - struct bus_type *edac_subsys; + const struct bus_type *edac_subsys; int err = -ENODEV; edac_dbg(1, "\n"); diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index 32a931d0cb71..1c9f62382666 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -67,7 +67,7 @@ char *edac_op_state_to_string(int opstate) * sysfs object: /sys/devices/system/edac * need to export to other files */ -static struct bus_type edac_subsys = { +static const struct bus_type edac_subsys = { .name = "edac", .dev_name = "edac", }; @@ -90,7 +90,7 @@ static void edac_subsys_exit(void) } /* return pointer to the 'edac' node in sysfs */ -struct bus_type *edac_get_sysfs_subsys(void) +const struct bus_type *edac_get_sysfs_subsys(void) { return &edac_subsys; } diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 287cc51dbc86..e823e4da086a 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -338,7 +338,7 @@ static struct kobj_type ktype_edac_pci_main_kobj = { static int edac_pci_main_kobj_setup(void) { int err = -ENODEV; - struct bus_type *edac_subsys; + const struct bus_type *edac_subsys; struct device *dev_root; edac_dbg(0, "\n"); diff --git a/include/linux/edac.h b/include/linux/edac.h index fa4bda2a70f6..ccaf2ae0801d 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -30,7 +30,7 @@ struct device; extern int edac_op_state; -struct bus_type *edac_get_sysfs_subsys(void); +const struct bus_type *edac_get_sysfs_subsys(void); static inline void opstate_init(void) { @@ -492,7 +492,7 @@ struct edac_raw_error_desc { */ struct mem_ctl_info { struct device dev; - struct bus_type *bus; + const struct bus_type *bus; struct list_head link; /* for global list of mem_ctl_info structs */ -- cgit From db2292b01b799e926abfdbd6fafa1f27f0d0e457 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 19 Dec 2023 15:07:23 +0100 Subject: PM: clk: make pm_clk_add_notifier() take a const pointer The driver core wants to work with const struct bus_type, so there's no reason that pm_clk_add_notifier() should not also do the same thing, considering that it just passes the pointer off to the driver core which is expecting a const *. Cc: Rafael J. Wysocki Link: https://lore.kernel.org/r/2023121922-triumph-exploit-f545@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/clock_ops.c | 2 +- include/linux/pm_clock.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index 4110c19c08dc..e18ba676cdf6 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -793,7 +793,7 @@ static int pm_clk_notify(struct notifier_block *nb, * the remaining members of @clknb should be populated prior to calling this * routine. */ -void pm_clk_add_notifier(struct bus_type *bus, +void pm_clk_add_notifier(const struct bus_type *bus, struct pm_clk_notifier_block *clknb) { if (!bus || !clknb) diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h index ada3a0ab10bf..68669ce18720 100644 --- a/include/linux/pm_clock.h +++ b/include/linux/pm_clock.h @@ -91,10 +91,10 @@ static inline int devm_pm_clk_create(struct device *dev) #endif #ifdef CONFIG_HAVE_CLK -extern void pm_clk_add_notifier(struct bus_type *bus, +extern void pm_clk_add_notifier(const struct bus_type *bus, struct pm_clk_notifier_block *clknb); #else -static inline void pm_clk_add_notifier(struct bus_type *bus, +static inline void pm_clk_add_notifier(const struct bus_type *bus, struct pm_clk_notifier_block *clknb) { } -- cgit From 93ec4a3b76404bce01bd5c9032bef5df6feb1d62 Mon Sep 17 00:00:00 2001 From: Jing Xia Date: Wed, 20 Dec 2023 10:46:03 +0800 Subject: class: fix use-after-free in class_register() The lock_class_key is still registered and can be found in lock_keys_hash hlist after subsys_private is freed in error handler path.A task who iterate over the lock_keys_hash later may cause use-after-free.So fix that up and unregister the lock_class_key before kfree(cp). On our platform, a driver fails to kset_register because of creating duplicate filename '/class/xxx'.With Kasan enabled, it prints a invalid-access bug report. KASAN bug report: BUG: KASAN: invalid-access in lockdep_register_key+0x19c/0x1bc Write of size 8 at addr 15ffff808b8c0368 by task modprobe/252 Pointer tag: [15], memory tag: [fe] CPU: 7 PID: 252 Comm: modprobe Tainted: G W 6.6.0-mainline-maybe-dirty #1 Call trace: dump_backtrace+0x1b0/0x1e4 show_stack+0x2c/0x40 dump_stack_lvl+0xac/0xe0 print_report+0x18c/0x4d8 kasan_report+0xe8/0x148 __hwasan_store8_noabort+0x88/0x98 lockdep_register_key+0x19c/0x1bc class_register+0x94/0x1ec init_module+0xbc/0xf48 [rfkill] do_one_initcall+0x17c/0x72c do_init_module+0x19c/0x3f8 ... Memory state around the buggy address: ffffff808b8c0100: 8a 8a 8a 8a 8a 8a 8a 8a 8a 8a 8a 8a 8a 8a 8a 8a ffffff808b8c0200: 8a 8a 8a 8a 8a 8a 8a 8a fe fe fe fe fe fe fe fe >ffffff808b8c0300: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe ^ ffffff808b8c0400: 03 03 03 03 03 03 03 03 03 03 03 03 03 03 03 03 As CONFIG_KASAN_GENERIC is not set, Kasan reports invalid-access not use-after-free here.In this case, modprobe is manipulating the corrupted lock_keys_hash hlish where lock_class_key is already freed before. It's worth noting that this only can happen if lockdep is enabled, which is not true for normal system. Fixes: dcfbb67e48a2 ("driver core: class: use lock_class_key already present in struct subsys_private") Cc: stable Signed-off-by: Jing Xia Signed-off-by: Xuewen Yan Link: https://lore.kernel.org/r/20231220024603.186078-1-jing.xia@unisoc.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/class.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/class.c b/drivers/base/class.c index 7e78aee0fd6c..7b38fdf8e1d7 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -213,6 +213,7 @@ int class_register(const struct class *cls) return 0; err_out: + lockdep_unregister_key(key); kfree(cp); return error; } -- cgit From c312828c37a72fe2d033a961c47c227b0767e9f8 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Fri, 29 Dec 2023 08:49:16 +0100 Subject: kernfs: convert kernfs_idr_lock to an irq safe raw spinlock bpf_cgroup_from_id() is basically a wrapper to cgroup_get_from_id(), that is relying on kernfs to determine the right cgroup associated to the target id. As a kfunc, it has the potential to be attached to any function through BPF, particularly in contexts where certain locks are held. However, kernfs is not using an irq safe spinlock for kernfs_idr_lock, that means any kernfs function that is acquiring this lock can be interrupted and potentially hit bpf_cgroup_from_id() in the process, triggering a deadlock. For example, it is really easy to trigger a lockdep splat between kernfs_idr_lock and rq->_lock, attaching a small BPF program to __set_cpus_allowed_ptr_locked() that just calls bpf_cgroup_from_id(): ===================================================== WARNING: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected 6.7.0-rc7-virtme #5 Not tainted ----------------------------------------------------- repro/131 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: ffffffffb2dc4578 (kernfs_idr_lock){+.+.}-{2:2}, at: kernfs_find_and_get_node_by_id+0x1d/0x80 and this task is already holding: ffff911cbecaf218 (&rq->__lock){-.-.}-{2:2}, at: task_rq_lock+0x50/0xc0 which would create a new lock dependency: (&rq->__lock){-.-.}-{2:2} -> (kernfs_idr_lock){+.+.}-{2:2} but this new dependency connects a HARDIRQ-irq-safe lock: (&rq->__lock){-.-.}-{2:2} ... which became HARDIRQ-irq-safe at: lock_acquire+0xbf/0x2b0 _raw_spin_lock_nested+0x2e/0x40 scheduler_tick+0x5d/0x170 update_process_times+0x9c/0xb0 tick_periodic+0x27/0xe0 tick_handle_periodic+0x24/0x70 __sysvec_apic_timer_interrupt+0x64/0x1a0 sysvec_apic_timer_interrupt+0x6f/0x80 asm_sysvec_apic_timer_interrupt+0x1a/0x20 memcpy+0xc/0x20 arch_dup_task_struct+0x15/0x30 copy_process+0x1ce/0x1eb0 kernel_clone+0xac/0x390 kernel_thread+0x6f/0xa0 kthreadd+0x199/0x230 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x1b/0x30 to a HARDIRQ-irq-unsafe lock: (kernfs_idr_lock){+.+.}-{2:2} ... which became HARDIRQ-irq-unsafe at: ... lock_acquire+0xbf/0x2b0 _raw_spin_lock+0x30/0x40 __kernfs_new_node.isra.0+0x83/0x280 kernfs_create_root+0xf6/0x1d0 sysfs_init+0x1b/0x70 mnt_init+0xd9/0x2a0 vfs_caches_init+0xcf/0xe0 start_kernel+0x58a/0x6a0 x86_64_start_reservations+0x18/0x30 x86_64_start_kernel+0xc5/0xe0 secondary_startup_64_no_verify+0x178/0x17b other info that might help us debug this: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(kernfs_idr_lock); local_irq_disable(); lock(&rq->__lock); lock(kernfs_idr_lock); lock(&rq->__lock); *** DEADLOCK *** Prevent this deadlock condition converting kernfs_idr_lock to a raw irq safe spinlock. The performance impact of this change should be negligible and it also helps to prevent similar deadlock conditions with any other subsystems that may depend on kernfs. Fixes: 332ea1f697be ("bpf: Add bpf_cgroup_from_id() kfunc") Cc: stable Signed-off-by: Andrea Righi Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20231229074916.53547-1-andrea.righi@canonical.com Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index bce1d7ac95ca..a62960fdf73f 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -27,7 +27,7 @@ static DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */ */ static DEFINE_SPINLOCK(kernfs_pr_cont_lock); static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ -static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ +static DEFINE_RAW_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) @@ -539,6 +539,7 @@ void kernfs_put(struct kernfs_node *kn) { struct kernfs_node *parent; struct kernfs_root *root; + unsigned long flags; if (!kn || !atomic_dec_and_test(&kn->count)) return; @@ -563,9 +564,9 @@ void kernfs_put(struct kernfs_node *kn) simple_xattrs_free(&kn->iattr->xattrs, NULL); kmem_cache_free(kernfs_iattrs_cache, kn->iattr); } - spin_lock(&kernfs_idr_lock); + raw_spin_lock_irqsave(&kernfs_idr_lock, flags); idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); - spin_unlock(&kernfs_idr_lock); + raw_spin_unlock_irqrestore(&kernfs_idr_lock, flags); kmem_cache_free(kernfs_node_cache, kn); kn = parent; @@ -607,6 +608,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, struct kernfs_node *kn; u32 id_highbits; int ret; + unsigned long irqflags; name = kstrdup_const(name, GFP_KERNEL); if (!name) @@ -617,13 +619,13 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, goto err_out1; idr_preload(GFP_KERNEL); - spin_lock(&kernfs_idr_lock); + raw_spin_lock_irqsave(&kernfs_idr_lock, irqflags); ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC); if (ret >= 0 && ret < root->last_id_lowbits) root->id_highbits++; id_highbits = root->id_highbits; root->last_id_lowbits = ret; - spin_unlock(&kernfs_idr_lock); + raw_spin_unlock_irqrestore(&kernfs_idr_lock, irqflags); idr_preload_end(); if (ret < 0) goto err_out2; @@ -659,9 +661,9 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, return kn; err_out3: - spin_lock(&kernfs_idr_lock); + raw_spin_lock_irqsave(&kernfs_idr_lock, irqflags); idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); - spin_unlock(&kernfs_idr_lock); + raw_spin_unlock_irqrestore(&kernfs_idr_lock, irqflags); err_out2: kmem_cache_free(kernfs_node_cache, kn); err_out1: @@ -714,8 +716,9 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, struct kernfs_node *kn; ino_t ino = kernfs_id_ino(id); u32 gen = kernfs_id_gen(id); + unsigned long flags; - spin_lock(&kernfs_idr_lock); + raw_spin_lock_irqsave(&kernfs_idr_lock, flags); kn = idr_find(&root->ino_idr, (u32)ino); if (!kn) @@ -739,10 +742,10 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, if (unlikely(!__kernfs_active(kn) || !atomic_inc_not_zero(&kn->count))) goto err_unlock; - spin_unlock(&kernfs_idr_lock); + raw_spin_unlock_irqrestore(&kernfs_idr_lock, flags); return kn; err_unlock: - spin_unlock(&kernfs_idr_lock); + raw_spin_unlock_irqrestore(&kernfs_idr_lock, flags); return NULL; } -- cgit From e3977e0609a07d86406029fceea0fd40d7849368 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 9 Jan 2024 11:48:02 -1000 Subject: Revert "kernfs: convert kernfs_idr_lock to an irq safe raw spinlock" This reverts commit dad3fb67ca1cbef87ce700e83a55835e5921ce8a. The commit converted kernfs_idr_lock to an IRQ-safe raw_spinlock because it could be acquired while holding an rq lock through bpf_cgroup_from_id(). However, kernfs_idr_lock is held while doing GPF_NOWAIT allocations which involves acquiring an non-IRQ-safe and non-raw lock leading to the following lockdep warning: ============================= [ BUG: Invalid wait context ] 6.7.0-rc5-kzm9g-00251-g655022a45b1c #578 Not tainted ----------------------------- swapper/0/0 is trying to lock: dfbcd488 (&c->lock){....}-{3:3}, at: local_lock_acquire+0x0/0xa4 other info that might help us debug this: context-{5:5} 2 locks held by swapper/0/0: #0: dfbc9c60 (lock){+.+.}-{3:3}, at: local_lock_acquire+0x0/0xa4 #1: c0c012a8 (kernfs_idr_lock){....}-{2:2}, at: __kernfs_new_node.constprop.0+0x68/0x258 stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.7.0-rc5-kzm9g-00251-g655022a45b1c #578 Hardware name: Generic SH73A0 (Flattened Device Tree) unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x68/0x90 dump_stack_lvl from __lock_acquire+0x3cc/0x168c __lock_acquire from lock_acquire+0x274/0x30c lock_acquire from local_lock_acquire+0x28/0xa4 local_lock_acquire from ___slab_alloc+0x234/0x8a8 ___slab_alloc from __slab_alloc.constprop.0+0x30/0x44 __slab_alloc.constprop.0 from kmem_cache_alloc+0x7c/0x148 kmem_cache_alloc from radix_tree_node_alloc.constprop.0+0x44/0xdc radix_tree_node_alloc.constprop.0 from idr_get_free+0x110/0x2b8 idr_get_free from idr_alloc_u32+0x9c/0x108 idr_alloc_u32 from idr_alloc_cyclic+0x50/0xb8 idr_alloc_cyclic from __kernfs_new_node.constprop.0+0x88/0x258 __kernfs_new_node.constprop.0 from kernfs_create_root+0xbc/0x154 kernfs_create_root from sysfs_init+0x18/0x5c sysfs_init from mnt_init+0xc4/0x220 mnt_init from vfs_caches_init+0x6c/0x88 vfs_caches_init from start_kernel+0x474/0x528 start_kernel from 0x0 Let's rever the commit. It's undesirable to spread out raw spinlock usage anyway and the problem can be solved by protecting the lookup path with RCU instead. Signed-off-by: Tejun Heo Cc: Andrea Righi Reported-by: Geert Uytterhoeven Link: http://lkml.kernel.org/r/CAMuHMdV=AKt+mwY7svEq5gFPx41LoSQZ_USME5_MEdWQze13ww@mail.gmail.com Link: https://lore.kernel.org/r/20240109214828.252092-2-tj@kernel.org Tested-by: Andrea Righi Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/dir.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index a62960fdf73f..bce1d7ac95ca 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -27,7 +27,7 @@ static DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */ */ static DEFINE_SPINLOCK(kernfs_pr_cont_lock); static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ -static DEFINE_RAW_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ +static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) @@ -539,7 +539,6 @@ void kernfs_put(struct kernfs_node *kn) { struct kernfs_node *parent; struct kernfs_root *root; - unsigned long flags; if (!kn || !atomic_dec_and_test(&kn->count)) return; @@ -564,9 +563,9 @@ void kernfs_put(struct kernfs_node *kn) simple_xattrs_free(&kn->iattr->xattrs, NULL); kmem_cache_free(kernfs_iattrs_cache, kn->iattr); } - raw_spin_lock_irqsave(&kernfs_idr_lock, flags); + spin_lock(&kernfs_idr_lock); idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); - raw_spin_unlock_irqrestore(&kernfs_idr_lock, flags); + spin_unlock(&kernfs_idr_lock); kmem_cache_free(kernfs_node_cache, kn); kn = parent; @@ -608,7 +607,6 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, struct kernfs_node *kn; u32 id_highbits; int ret; - unsigned long irqflags; name = kstrdup_const(name, GFP_KERNEL); if (!name) @@ -619,13 +617,13 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, goto err_out1; idr_preload(GFP_KERNEL); - raw_spin_lock_irqsave(&kernfs_idr_lock, irqflags); + spin_lock(&kernfs_idr_lock); ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC); if (ret >= 0 && ret < root->last_id_lowbits) root->id_highbits++; id_highbits = root->id_highbits; root->last_id_lowbits = ret; - raw_spin_unlock_irqrestore(&kernfs_idr_lock, irqflags); + spin_unlock(&kernfs_idr_lock); idr_preload_end(); if (ret < 0) goto err_out2; @@ -661,9 +659,9 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, return kn; err_out3: - raw_spin_lock_irqsave(&kernfs_idr_lock, irqflags); + spin_lock(&kernfs_idr_lock); idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); - raw_spin_unlock_irqrestore(&kernfs_idr_lock, irqflags); + spin_unlock(&kernfs_idr_lock); err_out2: kmem_cache_free(kernfs_node_cache, kn); err_out1: @@ -716,9 +714,8 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, struct kernfs_node *kn; ino_t ino = kernfs_id_ino(id); u32 gen = kernfs_id_gen(id); - unsigned long flags; - raw_spin_lock_irqsave(&kernfs_idr_lock, flags); + spin_lock(&kernfs_idr_lock); kn = idr_find(&root->ino_idr, (u32)ino); if (!kn) @@ -742,10 +739,10 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, if (unlikely(!__kernfs_active(kn) || !atomic_inc_not_zero(&kn->count))) goto err_unlock; - raw_spin_unlock_irqrestore(&kernfs_idr_lock, flags); + spin_unlock(&kernfs_idr_lock); return kn; err_unlock: - raw_spin_unlock_irqrestore(&kernfs_idr_lock, flags); + spin_unlock(&kernfs_idr_lock); return NULL; } -- cgit