diff options
Diffstat (limited to 'kernel/cpu.c')
| -rw-r--r-- | kernel/cpu.c | 402 | 
1 files changed, 383 insertions, 19 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index f4a2c5845bcb..88a7ede322bd 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -17,6 +17,7 @@  #include <linux/cpu.h>  #include <linux/oom.h>  #include <linux/rcupdate.h> +#include <linux/delay.h>  #include <linux/export.h>  #include <linux/bug.h>  #include <linux/kthread.h> @@ -59,6 +60,7 @@   * @last:	For multi-instance rollback, remember how far we got   * @cb_state:	The state for a single callback (install/uninstall)   * @result:	Result of the operation + * @ap_sync_state:	State for AP synchronization   * @done_up:	Signal completion to the issuer of the task for cpu-up   * @done_down:	Signal completion to the issuer of the task for cpu-down   */ @@ -76,6 +78,7 @@ struct cpuhp_cpu_state {  	struct hlist_node	*last;  	enum cpuhp_state	cb_state;  	int			result; +	atomic_t		ap_sync_state;  	struct completion	done_up;  	struct completion	done_down;  #endif @@ -276,6 +279,182 @@ static bool cpuhp_is_atomic_state(enum cpuhp_state state)  	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;  } +/* Synchronization state management */ +enum cpuhp_sync_state { +	SYNC_STATE_DEAD, +	SYNC_STATE_KICKED, +	SYNC_STATE_SHOULD_DIE, +	SYNC_STATE_ALIVE, +	SYNC_STATE_SHOULD_ONLINE, +	SYNC_STATE_ONLINE, +}; + +#ifdef CONFIG_HOTPLUG_CORE_SYNC +/** + * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown + * @state:	The synchronization state to set + * + * No synchronization point. Just update of the synchronization state, but implies + * a full barrier so that the AP changes are visible before the control CPU proceeds. + */ +static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) +{ +	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state); + +	(void)atomic_xchg(st, state); +} + +void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); } + +static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state, +				      enum cpuhp_sync_state next_state) +{ +	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); +	ktime_t now, end, start = ktime_get(); +	int sync; + +	end = start + 10ULL * NSEC_PER_SEC; + +	sync = atomic_read(st); +	while (1) { +		if (sync == state) { +			if (!atomic_try_cmpxchg(st, &sync, next_state)) +				continue; +			return true; +		} + +		now = ktime_get(); +		if (now > end) { +			/* Timeout. Leave the state unchanged */ +			return false; +		} else if (now - start < NSEC_PER_MSEC) { +			/* Poll for one millisecond */ +			arch_cpuhp_sync_state_poll(); +		} else { +			usleep_range_state(USEC_PER_MSEC, 2 * USEC_PER_MSEC, TASK_UNINTERRUPTIBLE); +		} +		sync = atomic_read(st); +	} +	return true; +} +#else  /* CONFIG_HOTPLUG_CORE_SYNC */ +static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { } +#endif /* !CONFIG_HOTPLUG_CORE_SYNC */ + +#ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD +/** + * cpuhp_ap_report_dead - Update synchronization state to DEAD + * + * No synchronization point. Just update of the synchronization state. + */ +void cpuhp_ap_report_dead(void) +{ +	cpuhp_ap_update_sync_state(SYNC_STATE_DEAD); +} + +void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { } + +/* + * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down + * because the AP cannot issue complete() at this stage. + */ +static void cpuhp_bp_sync_dead(unsigned int cpu) +{ +	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); +	int sync = atomic_read(st); + +	do { +		/* CPU can have reported dead already. Don't overwrite that! */ +		if (sync == SYNC_STATE_DEAD) +			break; +	} while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE)); + +	if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) { +		/* CPU reached dead state. Invoke the cleanup function */ +		arch_cpuhp_cleanup_dead_cpu(cpu); +		return; +	} + +	/* No further action possible. Emit message and give up. */ +	pr_err("CPU%u failed to report dead state\n", cpu); +} +#else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */ +static inline void cpuhp_bp_sync_dead(unsigned int cpu) { } +#endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */ + +#ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL +/** + * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive + * + * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits + * for the BP to release it. + */ +void cpuhp_ap_sync_alive(void) +{ +	atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state); + +	cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE); + +	/* Wait for the control CPU to release it. */ +	while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE) +		cpu_relax(); +} + +static bool cpuhp_can_boot_ap(unsigned int cpu) +{ +	atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); +	int sync = atomic_read(st); + +again: +	switch (sync) { +	case SYNC_STATE_DEAD: +		/* CPU is properly dead */ +		break; +	case SYNC_STATE_KICKED: +		/* CPU did not come up in previous attempt */ +		break; +	case SYNC_STATE_ALIVE: +		/* CPU is stuck cpuhp_ap_sync_alive(). */ +		break; +	default: +		/* CPU failed to report online or dead and is in limbo state. */ +		return false; +	} + +	/* Prepare for booting */ +	if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED)) +		goto again; + +	return true; +} + +void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { } + +/* + * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up + * because the AP cannot issue complete() so early in the bringup. + */ +static int cpuhp_bp_sync_alive(unsigned int cpu) +{ +	int ret = 0; + +	if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL)) +		return 0; + +	if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) { +		pr_err("CPU%u failed to report alive state\n", cpu); +		ret = -EIO; +	} + +	/* Let the architecture cleanup the kick alive mechanics. */ +	arch_cpuhp_cleanup_kick_cpu(cpu); +	return ret; +} +#else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */ +static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; } +static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; } +#endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */ +  /* Serializes the updates to cpu_online_mask, cpu_present_mask */  static DEFINE_MUTEX(cpu_add_remove_lock);  bool cpuhp_tasks_frozen; @@ -470,8 +649,23 @@ bool cpu_smt_possible(void)  		cpu_smt_control != CPU_SMT_NOT_SUPPORTED;  }  EXPORT_SYMBOL_GPL(cpu_smt_possible); + +static inline bool cpuhp_smt_aware(void) +{ +	return topology_smt_supported(); +} + +static inline const struct cpumask *cpuhp_get_primary_thread_mask(void) +{ +	return cpu_primary_thread_mask; +}  #else  static inline bool cpu_smt_allowed(unsigned int cpu) { return true; } +static inline bool cpuhp_smt_aware(void) { return false; } +static inline const struct cpumask *cpuhp_get_primary_thread_mask(void) +{ +	return cpu_present_mask; +}  #endif  static inline enum cpuhp_state @@ -558,7 +752,7 @@ static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st,  	return ret;  } -static int bringup_wait_for_ap(unsigned int cpu) +static int bringup_wait_for_ap_online(unsigned int cpu)  {  	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); @@ -579,38 +773,94 @@ static int bringup_wait_for_ap(unsigned int cpu)  	 */  	if (!cpu_smt_allowed(cpu))  		return -ECANCELED; +	return 0; +} + +#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP +static int cpuhp_kick_ap_alive(unsigned int cpu) +{ +	if (!cpuhp_can_boot_ap(cpu)) +		return -EAGAIN; + +	return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu)); +} + +static int cpuhp_bringup_ap(unsigned int cpu) +{ +	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); +	int ret; + +	/* +	 * Some architectures have to walk the irq descriptors to +	 * setup the vector space for the cpu which comes online. +	 * Prevent irq alloc/free across the bringup. +	 */ +	irq_lock_sparse(); + +	ret = cpuhp_bp_sync_alive(cpu); +	if (ret) +		goto out_unlock; + +	ret = bringup_wait_for_ap_online(cpu); +	if (ret) +		goto out_unlock; + +	irq_unlock_sparse();  	if (st->target <= CPUHP_AP_ONLINE_IDLE)  		return 0;  	return cpuhp_kick_ap(cpu, st, st->target); -} +out_unlock: +	irq_unlock_sparse(); +	return ret; +} +#else  static int bringup_cpu(unsigned int cpu)  { +	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);  	struct task_struct *idle = idle_thread_get(cpu);  	int ret; -	/* -	 * Reset stale stack state from the last time this CPU was online. -	 */ -	scs_task_reset(idle); -	kasan_unpoison_task_stack(idle); +	if (!cpuhp_can_boot_ap(cpu)) +		return -EAGAIN;  	/*  	 * Some architectures have to walk the irq descriptors to  	 * setup the vector space for the cpu which comes online. -	 * Prevent irq alloc/free across the bringup. +	 * +	 * Prevent irq alloc/free across the bringup by acquiring the +	 * sparse irq lock. Hold it until the upcoming CPU completes the +	 * startup in cpuhp_online_idle() which allows to avoid +	 * intermediate synchronization points in the architecture code.  	 */  	irq_lock_sparse(); -	/* Arch-specific enabling code. */  	ret = __cpu_up(cpu, idle); -	irq_unlock_sparse();  	if (ret) -		return ret; -	return bringup_wait_for_ap(cpu); +		goto out_unlock; + +	ret = cpuhp_bp_sync_alive(cpu); +	if (ret) +		goto out_unlock; + +	ret = bringup_wait_for_ap_online(cpu); +	if (ret) +		goto out_unlock; + +	irq_unlock_sparse(); + +	if (st->target <= CPUHP_AP_ONLINE_IDLE) +		return 0; + +	return cpuhp_kick_ap(cpu, st, st->target); + +out_unlock: +	irq_unlock_sparse(); +	return ret;  } +#endif  static int finish_cpu(unsigned int cpu)  { @@ -1099,6 +1349,8 @@ static int takedown_cpu(unsigned int cpu)  	/* This actually kills the CPU. */  	__cpu_die(cpu); +	cpuhp_bp_sync_dead(cpu); +  	tick_cleanup_dead_cpu(cpu);  	rcutree_migrate_callbacks(cpu);  	return 0; @@ -1345,8 +1597,10 @@ void cpuhp_online_idle(enum cpuhp_state state)  	if (state != CPUHP_AP_ONLINE_IDLE)  		return; +	cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE); +  	/* -	 * Unpart the stopper thread before we start the idle loop (and start +	 * Unpark the stopper thread before we start the idle loop (and start  	 * scheduling); this ensures the stopper task is always available.  	 */  	stop_machine_unpark(smp_processor_id()); @@ -1383,6 +1637,12 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)  			ret = PTR_ERR(idle);  			goto out;  		} + +		/* +		 * Reset stale stack state from the last time this CPU was online. +		 */ +		scs_task_reset(idle); +		kasan_unpoison_task_stack(idle);  	}  	cpuhp_tasks_frozen = tasks_frozen; @@ -1502,18 +1762,96 @@ int bringup_hibernate_cpu(unsigned int sleep_cpu)  	return 0;  } -void bringup_nonboot_cpus(unsigned int setup_max_cpus) +static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus, +				      enum cpuhp_state target)  {  	unsigned int cpu; -	for_each_present_cpu(cpu) { -		if (num_online_cpus() >= setup_max_cpus) +	for_each_cpu(cpu, mask) { +		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); + +		if (cpu_up(cpu, target) && can_rollback_cpu(st)) { +			/* +			 * If this failed then cpu_up() might have only +			 * rolled back to CPUHP_BP_KICK_AP for the final +			 * online. Clean it up. NOOP if already rolled back. +			 */ +			WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE)); +		} + +		if (!--ncpus)  			break; -		if (!cpu_online(cpu)) -			cpu_up(cpu, CPUHP_ONLINE);  	}  } +#ifdef CONFIG_HOTPLUG_PARALLEL +static bool __cpuhp_parallel_bringup __ro_after_init = true; + +static int __init parallel_bringup_parse_param(char *arg) +{ +	return kstrtobool(arg, &__cpuhp_parallel_bringup); +} +early_param("cpuhp.parallel", parallel_bringup_parse_param); + +/* + * On architectures which have enabled parallel bringup this invokes all BP + * prepare states for each of the to be onlined APs first. The last state + * sends the startup IPI to the APs. The APs proceed through the low level + * bringup code in parallel and then wait for the control CPU to release + * them one by one for the final onlining procedure. + * + * This avoids waiting for each AP to respond to the startup IPI in + * CPUHP_BRINGUP_CPU. + */ +static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus) +{ +	const struct cpumask *mask = cpu_present_mask; + +	if (__cpuhp_parallel_bringup) +		__cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup(); +	if (!__cpuhp_parallel_bringup) +		return false; + +	if (cpuhp_smt_aware()) { +		const struct cpumask *pmask = cpuhp_get_primary_thread_mask(); +		static struct cpumask tmp_mask __initdata; + +		/* +		 * X86 requires to prevent that SMT siblings stopped while +		 * the primary thread does a microcode update for various +		 * reasons. Bring the primary threads up first. +		 */ +		cpumask_and(&tmp_mask, mask, pmask); +		cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP); +		cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE); +		/* Account for the online CPUs */ +		ncpus -= num_online_cpus(); +		if (!ncpus) +			return true; +		/* Create the mask for secondary CPUs */ +		cpumask_andnot(&tmp_mask, mask, pmask); +		mask = &tmp_mask; +	} + +	/* Bring the not-yet started CPUs up */ +	cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP); +	cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE); +	return true; +} +#else +static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; } +#endif /* CONFIG_HOTPLUG_PARALLEL */ + +void __init bringup_nonboot_cpus(unsigned int setup_max_cpus) +{ +	/* Try parallel bringup optimization if enabled */ +	if (cpuhp_bringup_cpus_parallel(setup_max_cpus)) +		return; + +	/* Full per CPU serialized bringup */ +	cpuhp_bringup_mask(cpu_present_mask, setup_max_cpus, CPUHP_ONLINE); +} +  #ifdef CONFIG_PM_SLEEP_SMP  static cpumask_var_t frozen_cpus; @@ -1740,13 +2078,38 @@ static struct cpuhp_step cpuhp_hp_states[] = {  		.startup.single		= timers_prepare_cpu,  		.teardown.single	= timers_dead_cpu,  	}, -	/* Kicks the plugged cpu into life */ + +#ifdef CONFIG_HOTPLUG_SPLIT_STARTUP +	/* +	 * Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until +	 * the next step will release it. +	 */ +	[CPUHP_BP_KICK_AP] = { +		.name			= "cpu:kick_ap", +		.startup.single		= cpuhp_kick_ap_alive, +	}, + +	/* +	 * Waits for the AP to reach cpuhp_ap_sync_alive() and then +	 * releases it for the complete bringup. +	 */ +	[CPUHP_BRINGUP_CPU] = { +		.name			= "cpu:bringup", +		.startup.single		= cpuhp_bringup_ap, +		.teardown.single	= finish_cpu, +		.cant_stop		= true, +	}, +#else +	/* +	 * All-in-one CPU bringup state which includes the kick alive. +	 */  	[CPUHP_BRINGUP_CPU] = {  		.name			= "cpu:bringup",  		.startup.single		= bringup_cpu,  		.teardown.single	= finish_cpu,  		.cant_stop		= true,  	}, +#endif  	/* Final state before CPU kills itself */  	[CPUHP_AP_IDLE_DEAD] = {  		.name			= "idle:dead", @@ -2723,6 +3086,7 @@ void __init boot_cpu_hotplug_init(void)  {  #ifdef CONFIG_SMP  	cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask); +	atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE);  #endif  	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);  	this_cpu_write(cpuhp_state.target, CPUHP_ONLINE);  |