diff options
Diffstat (limited to 'kernel/stop_machine.c')
| -rw-r--r-- | kernel/stop_machine.c | 537 | 
1 files changed, 412 insertions, 125 deletions
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 9bb9fb1bd79c..b4e7431e7c78 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -1,17 +1,384 @@ -/* Copyright 2008, 2005 Rusty Russell [email protected] IBM Corporation. - * GPL v2 and any later version. +/* + * kernel/stop_machine.c + * + * Copyright (C) 2008, 2005	IBM Corporation. + * Copyright (C) 2008, 2005	Rusty Russell [email protected] + * Copyright (C) 2010		SUSE Linux Products GmbH + * Copyright (C) 2010		Tejun Heo <[email protected]> + * + * This file is released under the GPLv2 and any later version.   */ +#include <linux/completion.h>  #include <linux/cpu.h> -#include <linux/err.h> +#include <linux/init.h>  #include <linux/kthread.h>  #include <linux/module.h> +#include <linux/percpu.h>  #include <linux/sched.h>  #include <linux/stop_machine.h> -#include <linux/syscalls.h>  #include <linux/interrupt.h> +#include <linux/kallsyms.h>  #include <asm/atomic.h> -#include <asm/uaccess.h> + +/* + * Structure to determine completion condition and record errors.  May + * be shared by works on different cpus. + */ +struct cpu_stop_done { +	atomic_t		nr_todo;	/* nr left to execute */ +	bool			executed;	/* actually executed? */ +	int			ret;		/* collected return value */ +	struct completion	completion;	/* fired if nr_todo reaches 0 */ +}; + +/* the actual stopper, one per every possible cpu, enabled on online cpus */ +struct cpu_stopper { +	spinlock_t		lock; +	struct list_head	works;		/* list of pending works */ +	struct task_struct	*thread;	/* stopper thread */ +	bool			enabled;	/* is this stopper enabled? */ +}; + +static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); + +static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) +{ +	memset(done, 0, sizeof(*done)); +	atomic_set(&done->nr_todo, nr_todo); +	init_completion(&done->completion); +} + +/* signal completion unless @done is NULL */ +static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed) +{ +	if (done) { +		if (executed) +			done->executed = true; +		if (atomic_dec_and_test(&done->nr_todo)) +			complete(&done->completion); +	} +} + +/* queue @work to @stopper.  if offline, @work is completed immediately */ +static void cpu_stop_queue_work(struct cpu_stopper *stopper, +				struct cpu_stop_work *work) +{ +	unsigned long flags; + +	spin_lock_irqsave(&stopper->lock, flags); + +	if (stopper->enabled) { +		list_add_tail(&work->list, &stopper->works); +		wake_up_process(stopper->thread); +	} else +		cpu_stop_signal_done(work->done, false); + +	spin_unlock_irqrestore(&stopper->lock, flags); +} + +/** + * stop_one_cpu - stop a cpu + * @cpu: cpu to stop + * @fn: function to execute + * @arg: argument to @fn + * + * Execute @fn(@arg) on @cpu.  @fn is run in a process context with + * the highest priority preempting any task on the cpu and + * monopolizing it.  This function returns after the execution is + * complete. + * + * This function doesn't guarantee @cpu stays online till @fn + * completes.  If @cpu goes down in the middle, execution may happen + * partially or fully on different cpus.  @fn should either be ready + * for that or the caller should ensure that @cpu stays online until + * this function completes. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * -ENOENT if @fn(@arg) was not executed because @cpu was offline; + * otherwise, the return value of @fn. + */ +int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) +{ +	struct cpu_stop_done done; +	struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done }; + +	cpu_stop_init_done(&done, 1); +	cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work); +	wait_for_completion(&done.completion); +	return done.executed ? done.ret : -ENOENT; +} + +/** + * stop_one_cpu_nowait - stop a cpu but don't wait for completion + * @cpu: cpu to stop + * @fn: function to execute + * @arg: argument to @fn + * + * Similar to stop_one_cpu() but doesn't wait for completion.  The + * caller is responsible for ensuring @work_buf is currently unused + * and will remain untouched until stopper starts executing @fn. + * + * CONTEXT: + * Don't care. + */ +void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, +			struct cpu_stop_work *work_buf) +{ +	*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; +	cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf); +} + +/* static data for stop_cpus */ +static DEFINE_MUTEX(stop_cpus_mutex); +static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work); + +int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) +{ +	struct cpu_stop_work *work; +	struct cpu_stop_done done; +	unsigned int cpu; + +	/* initialize works and done */ +	for_each_cpu(cpu, cpumask) { +		work = &per_cpu(stop_cpus_work, cpu); +		work->fn = fn; +		work->arg = arg; +		work->done = &done; +	} +	cpu_stop_init_done(&done, cpumask_weight(cpumask)); + +	/* +	 * Disable preemption while queueing to avoid getting +	 * preempted by a stopper which might wait for other stoppers +	 * to enter @fn which can lead to deadlock. +	 */ +	preempt_disable(); +	for_each_cpu(cpu, cpumask) +		cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), +				    &per_cpu(stop_cpus_work, cpu)); +	preempt_enable(); + +	wait_for_completion(&done.completion); +	return done.executed ? done.ret : -ENOENT; +} + +/** + * stop_cpus - stop multiple cpus + * @cpumask: cpus to stop + * @fn: function to execute + * @arg: argument to @fn + * + * Execute @fn(@arg) on online cpus in @cpumask.  On each target cpu, + * @fn is run in a process context with the highest priority + * preempting any task on the cpu and monopolizing it.  This function + * returns after all executions are complete. + * + * This function doesn't guarantee the cpus in @cpumask stay online + * till @fn completes.  If some cpus go down in the middle, execution + * on the cpu may happen partially or fully on different cpus.  @fn + * should either be ready for that or the caller should ensure that + * the cpus stay online until this function completes. + * + * All stop_cpus() calls are serialized making it safe for @fn to wait + * for all cpus to start executing it. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * -ENOENT if @fn(@arg) was not executed at all because all cpus in + * @cpumask were offline; otherwise, 0 if all executions of @fn + * returned 0, any non zero return value if any returned non zero. + */ +int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) +{ +	int ret; + +	/* static works are used, process one request at a time */ +	mutex_lock(&stop_cpus_mutex); +	ret = __stop_cpus(cpumask, fn, arg); +	mutex_unlock(&stop_cpus_mutex); +	return ret; +} + +/** + * try_stop_cpus - try to stop multiple cpus + * @cpumask: cpus to stop + * @fn: function to execute + * @arg: argument to @fn + * + * Identical to stop_cpus() except that it fails with -EAGAIN if + * someone else is already using the facility. + * + * CONTEXT: + * Might sleep. + * + * RETURNS: + * -EAGAIN if someone else is already stopping cpus, -ENOENT if + * @fn(@arg) was not executed at all because all cpus in @cpumask were + * offline; otherwise, 0 if all executions of @fn returned 0, any non + * zero return value if any returned non zero. + */ +int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) +{ +	int ret; + +	/* static works are used, process one request at a time */ +	if (!mutex_trylock(&stop_cpus_mutex)) +		return -EAGAIN; +	ret = __stop_cpus(cpumask, fn, arg); +	mutex_unlock(&stop_cpus_mutex); +	return ret; +} + +static int cpu_stopper_thread(void *data) +{ +	struct cpu_stopper *stopper = data; +	struct cpu_stop_work *work; +	int ret; + +repeat: +	set_current_state(TASK_INTERRUPTIBLE);	/* mb paired w/ kthread_stop */ + +	if (kthread_should_stop()) { +		__set_current_state(TASK_RUNNING); +		return 0; +	} + +	work = NULL; +	spin_lock_irq(&stopper->lock); +	if (!list_empty(&stopper->works)) { +		work = list_first_entry(&stopper->works, +					struct cpu_stop_work, list); +		list_del_init(&work->list); +	} +	spin_unlock_irq(&stopper->lock); + +	if (work) { +		cpu_stop_fn_t fn = work->fn; +		void *arg = work->arg; +		struct cpu_stop_done *done = work->done; +		char ksym_buf[KSYM_NAME_LEN]; + +		__set_current_state(TASK_RUNNING); + +		/* cpu stop callbacks are not allowed to sleep */ +		preempt_disable(); + +		ret = fn(arg); +		if (ret) +			done->ret = ret; + +		/* restore preemption and check it's still balanced */ +		preempt_enable(); +		WARN_ONCE(preempt_count(), +			  "cpu_stop: %s(%p) leaked preempt count\n", +			  kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL, +					  ksym_buf), arg); + +		cpu_stop_signal_done(done, true); +	} else +		schedule(); + +	goto repeat; +} + +/* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ +static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, +					   unsigned long action, void *hcpu) +{ +	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; +	unsigned int cpu = (unsigned long)hcpu; +	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); +	struct task_struct *p; + +	switch (action & ~CPU_TASKS_FROZEN) { +	case CPU_UP_PREPARE: +		BUG_ON(stopper->thread || stopper->enabled || +		       !list_empty(&stopper->works)); +		p = kthread_create(cpu_stopper_thread, stopper, "migration/%d", +				   cpu); +		if (IS_ERR(p)) +			return NOTIFY_BAD; +		sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); +		get_task_struct(p); +		stopper->thread = p; +		break; + +	case CPU_ONLINE: +		kthread_bind(stopper->thread, cpu); +		/* strictly unnecessary, as first user will wake it */ +		wake_up_process(stopper->thread); +		/* mark enabled */ +		spin_lock_irq(&stopper->lock); +		stopper->enabled = true; +		spin_unlock_irq(&stopper->lock); +		break; + +#ifdef CONFIG_HOTPLUG_CPU +	case CPU_UP_CANCELED: +	case CPU_DEAD: +	{ +		struct cpu_stop_work *work; + +		/* kill the stopper */ +		kthread_stop(stopper->thread); +		/* drain remaining works */ +		spin_lock_irq(&stopper->lock); +		list_for_each_entry(work, &stopper->works, list) +			cpu_stop_signal_done(work->done, false); +		stopper->enabled = false; +		spin_unlock_irq(&stopper->lock); +		/* release the stopper */ +		put_task_struct(stopper->thread); +		stopper->thread = NULL; +		break; +	} +#endif +	} + +	return NOTIFY_OK; +} + +/* + * Give it a higher priority so that cpu stopper is available to other + * cpu notifiers.  It currently shares the same priority as sched + * migration_notifier. + */ +static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = { +	.notifier_call	= cpu_stop_cpu_callback, +	.priority	= 10, +}; + +static int __init cpu_stop_init(void) +{ +	void *bcpu = (void *)(long)smp_processor_id(); +	unsigned int cpu; +	int err; + +	for_each_possible_cpu(cpu) { +		struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); + +		spin_lock_init(&stopper->lock); +		INIT_LIST_HEAD(&stopper->works); +	} + +	/* start one for the boot cpu */ +	err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE, +				    bcpu); +	BUG_ON(err == NOTIFY_BAD); +	cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); +	register_cpu_notifier(&cpu_stop_cpu_notifier); + +	return 0; +} +early_initcall(cpu_stop_init); + +#ifdef CONFIG_STOP_MACHINE  /* This controls the threads on each CPU. */  enum stopmachine_state { @@ -26,174 +393,94 @@ enum stopmachine_state {  	/* Exit */  	STOPMACHINE_EXIT,  }; -static enum stopmachine_state state;  struct stop_machine_data { -	int (*fn)(void *); -	void *data; -	int fnret; +	int			(*fn)(void *); +	void			*data; +	/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ +	unsigned int		num_threads; +	const struct cpumask	*active_cpus; + +	enum stopmachine_state	state; +	atomic_t		thread_ack;  }; -/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ -static unsigned int num_threads; -static atomic_t thread_ack; -static DEFINE_MUTEX(lock); -/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */ -static DEFINE_MUTEX(setup_lock); -/* Users of stop_machine. */ -static int refcount; -static struct workqueue_struct *stop_machine_wq; -static struct stop_machine_data active, idle; -static const struct cpumask *active_cpus; -static void __percpu *stop_machine_work; - -static void set_state(enum stopmachine_state newstate) +static void set_state(struct stop_machine_data *smdata, +		      enum stopmachine_state newstate)  {  	/* Reset ack counter. */ -	atomic_set(&thread_ack, num_threads); +	atomic_set(&smdata->thread_ack, smdata->num_threads);  	smp_wmb(); -	state = newstate; +	smdata->state = newstate;  }  /* Last one to ack a state moves to the next state. */ -static void ack_state(void) +static void ack_state(struct stop_machine_data *smdata)  { -	if (atomic_dec_and_test(&thread_ack)) -		set_state(state + 1); +	if (atomic_dec_and_test(&smdata->thread_ack)) +		set_state(smdata, smdata->state + 1);  } -/* This is the actual function which stops the CPU. It runs - * in the context of a dedicated stopmachine workqueue. */ -static void stop_cpu(struct work_struct *unused) +/* This is the cpu_stop function which stops the CPU. */ +static int stop_machine_cpu_stop(void *data)  { +	struct stop_machine_data *smdata = data;  	enum stopmachine_state curstate = STOPMACHINE_NONE; -	struct stop_machine_data *smdata = &idle; -	int cpu = smp_processor_id(); -	int err; +	int cpu = smp_processor_id(), err = 0; +	bool is_active; + +	if (!smdata->active_cpus) +		is_active = cpu == cpumask_first(cpu_online_mask); +	else +		is_active = cpumask_test_cpu(cpu, smdata->active_cpus); -	if (!active_cpus) { -		if (cpu == cpumask_first(cpu_online_mask)) -			smdata = &active; -	} else { -		if (cpumask_test_cpu(cpu, active_cpus)) -			smdata = &active; -	}  	/* Simple state machine */  	do {  		/* Chill out and ensure we re-read stopmachine_state. */  		cpu_relax(); -		if (state != curstate) { -			curstate = state; +		if (smdata->state != curstate) { +			curstate = smdata->state;  			switch (curstate) {  			case STOPMACHINE_DISABLE_IRQ:  				local_irq_disable();  				hard_irq_disable();  				break;  			case STOPMACHINE_RUN: -				/* On multiple CPUs only a single error code -				 * is needed to tell that something failed. */ -				err = smdata->fn(smdata->data); -				if (err) -					smdata->fnret = err; +				if (is_active) +					err = smdata->fn(smdata->data);  				break;  			default:  				break;  			} -			ack_state(); +			ack_state(smdata);  		}  	} while (curstate != STOPMACHINE_EXIT);  	local_irq_enable(); +	return err;  } -/* Callback for CPUs which aren't supposed to do anything. */ -static int chill(void *unused) -{ -	return 0; -} - -int stop_machine_create(void) -{ -	mutex_lock(&setup_lock); -	if (refcount) -		goto done; -	stop_machine_wq = create_rt_workqueue("kstop"); -	if (!stop_machine_wq) -		goto err_out; -	stop_machine_work = alloc_percpu(struct work_struct); -	if (!stop_machine_work) -		goto err_out; -done: -	refcount++; -	mutex_unlock(&setup_lock); -	return 0; - -err_out: -	if (stop_machine_wq) -		destroy_workqueue(stop_machine_wq); -	mutex_unlock(&setup_lock); -	return -ENOMEM; -} -EXPORT_SYMBOL_GPL(stop_machine_create); - -void stop_machine_destroy(void) -{ -	mutex_lock(&setup_lock); -	refcount--; -	if (refcount) -		goto done; -	destroy_workqueue(stop_machine_wq); -	free_percpu(stop_machine_work); -done: -	mutex_unlock(&setup_lock); -} -EXPORT_SYMBOL_GPL(stop_machine_destroy); -  int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)  { -	struct work_struct *sm_work; -	int i, ret; - -	/* Set up initial state. */ -	mutex_lock(&lock); -	num_threads = num_online_cpus(); -	active_cpus = cpus; -	active.fn = fn; -	active.data = data; -	active.fnret = 0; -	idle.fn = chill; -	idle.data = NULL; - -	set_state(STOPMACHINE_PREPARE); - -	/* Schedule the stop_cpu work on all cpus: hold this CPU so one -	 * doesn't hit this CPU until we're ready. */ -	get_cpu(); -	for_each_online_cpu(i) { -		sm_work = per_cpu_ptr(stop_machine_work, i); -		INIT_WORK(sm_work, stop_cpu); -		queue_work_on(i, stop_machine_wq, sm_work); -	} -	/* This will release the thread on our CPU. */ -	put_cpu(); -	flush_workqueue(stop_machine_wq); -	ret = active.fnret; -	mutex_unlock(&lock); -	return ret; +	struct stop_machine_data smdata = { .fn = fn, .data = data, +					    .num_threads = num_online_cpus(), +					    .active_cpus = cpus }; + +	/* Set the initial state and stop all online cpus. */ +	set_state(&smdata, STOPMACHINE_PREPARE); +	return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);  }  int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)  {  	int ret; -	ret = stop_machine_create(); -	if (ret) -		return ret;  	/* No CPUs can come up or down during this. */  	get_online_cpus();  	ret = __stop_machine(fn, data, cpus);  	put_online_cpus(); -	stop_machine_destroy();  	return ret;  }  EXPORT_SYMBOL_GPL(stop_machine); + +#endif	/* CONFIG_STOP_MACHINE */  |