diff options
Diffstat (limited to 'kernel/workqueue.c')
| -rw-r--r-- | kernel/workqueue.c | 206 | 
1 files changed, 122 insertions, 84 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 50142fc08902..33a6b4a2443d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -205,9 +205,26 @@ struct pool_workqueue {  	int			refcnt;		/* L: reference count */  	int			nr_in_flight[WORK_NR_COLORS];  						/* L: nr of in_flight works */ + +	/* +	 * nr_active management and WORK_STRUCT_INACTIVE: +	 * +	 * When pwq->nr_active >= max_active, new work item is queued to +	 * pwq->inactive_works instead of pool->worklist and marked with +	 * WORK_STRUCT_INACTIVE. +	 * +	 * All work items marked with WORK_STRUCT_INACTIVE do not participate +	 * in pwq->nr_active and all work items in pwq->inactive_works are +	 * marked with WORK_STRUCT_INACTIVE.  But not all WORK_STRUCT_INACTIVE +	 * work items are in pwq->inactive_works.  Some of them are ready to +	 * run in pool->worklist or worker->scheduled.  Those work itmes are +	 * only struct wq_barrier which is used for flush_work() and should +	 * not participate in pwq->nr_active.  For non-barrier work item, it +	 * is marked with WORK_STRUCT_INACTIVE iff it is in pwq->inactive_works. +	 */  	int			nr_active;	/* L: nr of active works */  	int			max_active;	/* L: max active works */ -	struct list_head	delayed_works;	/* L: delayed works */ +	struct list_head	inactive_works;	/* L: inactive works */  	struct list_head	pwqs_node;	/* WR: node on wq->pwqs */  	struct list_head	mayday_node;	/* MD: node on wq->maydays */ @@ -524,7 +541,7 @@ static inline void debug_work_deactivate(struct work_struct *work) { }  #endif  /** - * worker_pool_assign_id - allocate ID and assing it to @pool + * worker_pool_assign_id - allocate ID and assign it to @pool   * @pool: the pool pointer of interest   *   * Returns 0 if ID in [0, WORK_OFFQ_POOL_NONE) is allocated and assigned @@ -579,9 +596,9 @@ static unsigned int work_color_to_flags(int color)  	return color << WORK_STRUCT_COLOR_SHIFT;  } -static int get_work_color(struct work_struct *work) +static int get_work_color(unsigned long work_data)  { -	return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) & +	return (work_data >> WORK_STRUCT_COLOR_SHIFT) &  		((1 << WORK_STRUCT_COLOR_BITS) - 1);  } @@ -1136,7 +1153,7 @@ static void put_pwq_unlocked(struct pool_workqueue *pwq)  	}  } -static void pwq_activate_delayed_work(struct work_struct *work) +static void pwq_activate_inactive_work(struct work_struct *work)  {  	struct pool_workqueue *pwq = get_work_pwq(work); @@ -1144,22 +1161,22 @@ static void pwq_activate_delayed_work(struct work_struct *work)  	if (list_empty(&pwq->pool->worklist))  		pwq->pool->watchdog_ts = jiffies;  	move_linked_works(work, &pwq->pool->worklist, NULL); -	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); +	__clear_bit(WORK_STRUCT_INACTIVE_BIT, work_data_bits(work));  	pwq->nr_active++;  } -static void pwq_activate_first_delayed(struct pool_workqueue *pwq) +static void pwq_activate_first_inactive(struct pool_workqueue *pwq)  { -	struct work_struct *work = list_first_entry(&pwq->delayed_works, +	struct work_struct *work = list_first_entry(&pwq->inactive_works,  						    struct work_struct, entry); -	pwq_activate_delayed_work(work); +	pwq_activate_inactive_work(work);  }  /**   * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight   * @pwq: pwq of interest - * @color: color of work which left the queue + * @work_data: work_data of work which left the queue   *   * A work either has completed or is removed from pending queue,   * decrement nr_in_flight of its pwq and handle workqueue flushing. @@ -1167,21 +1184,21 @@ static void pwq_activate_first_delayed(struct pool_workqueue *pwq)   * CONTEXT:   * raw_spin_lock_irq(pool->lock).   */ -static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color) +static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, unsigned long work_data)  { -	/* uncolored work items don't participate in flushing or nr_active */ -	if (color == WORK_NO_COLOR) -		goto out_put; - -	pwq->nr_in_flight[color]--; +	int color = get_work_color(work_data); -	pwq->nr_active--; -	if (!list_empty(&pwq->delayed_works)) { -		/* one down, submit a delayed one */ -		if (pwq->nr_active < pwq->max_active) -			pwq_activate_first_delayed(pwq); +	if (!(work_data & WORK_STRUCT_INACTIVE)) { +		pwq->nr_active--; +		if (!list_empty(&pwq->inactive_works)) { +			/* one down, submit an inactive one */ +			if (pwq->nr_active < pwq->max_active) +				pwq_activate_first_inactive(pwq); +		}  	} +	pwq->nr_in_flight[color]--; +  	/* is flush in progress and are we at the flushing tip? */  	if (likely(pwq->flush_color != color))  		goto out_put; @@ -1281,17 +1298,21 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,  		debug_work_deactivate(work);  		/* -		 * A delayed work item cannot be grabbed directly because -		 * it might have linked NO_COLOR work items which, if left -		 * on the delayed_list, will confuse pwq->nr_active +		 * A cancelable inactive work item must be in the +		 * pwq->inactive_works since a queued barrier can't be +		 * canceled (see the comments in insert_wq_barrier()). +		 * +		 * An inactive work item cannot be grabbed directly because +		 * it might have linked barrier work items which, if left +		 * on the inactive_works list, will confuse pwq->nr_active  		 * management later on and cause stall.  Make sure the work  		 * item is activated before grabbing.  		 */ -		if (*work_data_bits(work) & WORK_STRUCT_DELAYED) -			pwq_activate_delayed_work(work); +		if (*work_data_bits(work) & WORK_STRUCT_INACTIVE) +			pwq_activate_inactive_work(work);  		list_del_init(&work->entry); -		pwq_dec_nr_in_flight(pwq, get_work_color(work)); +		pwq_dec_nr_in_flight(pwq, *work_data_bits(work));  		/* work->data points to pwq iff queued, point to pool */  		set_work_pool_and_keep_pending(work, pool->id); @@ -1490,8 +1511,8 @@ retry:  		if (list_empty(worklist))  			pwq->pool->watchdog_ts = jiffies;  	} else { -		work_flags |= WORK_STRUCT_DELAYED; -		worklist = &pwq->delayed_works; +		work_flags |= WORK_STRUCT_INACTIVE; +		worklist = &pwq->inactive_works;  	}  	debug_work_activate(work); @@ -1912,14 +1933,14 @@ static void worker_detach_from_pool(struct worker *worker)   */  static struct worker *create_worker(struct worker_pool *pool)  { -	struct worker *worker = NULL; -	int id = -1; +	struct worker *worker; +	int id;  	char id_buf[16];  	/* ID is needed to determine kthread name */ -	id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL); +	id = ida_alloc(&pool->worker_ida, GFP_KERNEL);  	if (id < 0) -		goto fail; +		return NULL;  	worker = alloc_worker(pool->node);  	if (!worker) @@ -1954,8 +1975,7 @@ static struct worker *create_worker(struct worker_pool *pool)  	return worker;  fail: -	if (id >= 0) -		ida_simple_remove(&pool->worker_ida, id); +	ida_free(&pool->worker_ida, id);  	kfree(worker);  	return NULL;  } @@ -2173,7 +2193,7 @@ __acquires(&pool->lock)  	struct pool_workqueue *pwq = get_work_pwq(work);  	struct worker_pool *pool = worker->pool;  	bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE; -	int work_color; +	unsigned long work_data;  	struct worker *collision;  #ifdef CONFIG_LOCKDEP  	/* @@ -2209,7 +2229,8 @@ __acquires(&pool->lock)  	worker->current_work = work;  	worker->current_func = work->func;  	worker->current_pwq = pwq; -	work_color = get_work_color(work); +	work_data = *work_data_bits(work); +	worker->current_color = get_work_color(work_data);  	/*  	 * Record wq name for cmdline and debug reporting, may get @@ -2315,7 +2336,8 @@ __acquires(&pool->lock)  	worker->current_work = NULL;  	worker->current_func = NULL;  	worker->current_pwq = NULL; -	pwq_dec_nr_in_flight(pwq, work_color); +	worker->current_color = INT_MAX; +	pwq_dec_nr_in_flight(pwq, work_data);  }  /** @@ -2378,7 +2400,7 @@ woke_up:  		set_pf_worker(false);  		set_task_comm(worker->task, "kworker/dying"); -		ida_simple_remove(&pool->worker_ida, worker->id); +		ida_free(&pool->worker_ida, worker->id);  		worker_detach_from_pool(worker);  		kfree(worker);  		return 0; @@ -2531,7 +2553,7 @@ repeat:  			/*  			 * The above execution of rescued work items could  			 * have created more to rescue through -			 * pwq_activate_first_delayed() or chained +			 * pwq_activate_first_inactive() or chained  			 * queueing.  Let's put @pwq back on mayday list so  			 * that such back-to-back work items, which may be  			 * being used to relieve memory pressure, don't @@ -2658,8 +2680,9 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,  			      struct wq_barrier *barr,  			      struct work_struct *target, struct worker *worker)  { +	unsigned int work_flags = 0; +	unsigned int work_color;  	struct list_head *head; -	unsigned int linked = 0;  	/*  	 * debugobject calls are safe here even with pool->lock locked @@ -2674,24 +2697,31 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,  	barr->task = current; +	/* The barrier work item does not participate in pwq->nr_active. */ +	work_flags |= WORK_STRUCT_INACTIVE; +  	/*  	 * If @target is currently being executed, schedule the  	 * barrier to the worker; otherwise, put it after @target.  	 */ -	if (worker) +	if (worker) {  		head = worker->scheduled.next; -	else { +		work_color = worker->current_color; +	} else {  		unsigned long *bits = work_data_bits(target);  		head = target->entry.next;  		/* there can already be other linked works, inherit and set */ -		linked = *bits & WORK_STRUCT_LINKED; +		work_flags |= *bits & WORK_STRUCT_LINKED; +		work_color = get_work_color(*bits);  		__set_bit(WORK_STRUCT_LINKED_BIT, bits);  	} +	pwq->nr_in_flight[work_color]++; +	work_flags |= work_color_to_flags(work_color); +  	debug_work_activate(&barr->work); -	insert_work(pwq, &barr->work, head, -		    work_color_to_flags(WORK_NO_COLOR) | linked); +	insert_work(pwq, &barr->work, head, work_flags);  }  /** @@ -2957,7 +2987,7 @@ reflush:  		bool drained;  		raw_spin_lock_irq(&pwq->pool->lock); -		drained = !pwq->nr_active && list_empty(&pwq->delayed_works); +		drained = !pwq->nr_active && list_empty(&pwq->inactive_works);  		raw_spin_unlock_irq(&pwq->pool->lock);  		if (drained) @@ -3293,7 +3323,7 @@ int schedule_on_each_cpu(work_func_t func)  	if (!works)  		return -ENOMEM; -	get_online_cpus(); +	cpus_read_lock();  	for_each_online_cpu(cpu) {  		struct work_struct *work = per_cpu_ptr(works, cpu); @@ -3305,7 +3335,7 @@ int schedule_on_each_cpu(work_func_t func)  	for_each_online_cpu(cpu)  		flush_work(per_cpu_ptr(works, cpu)); -	put_online_cpus(); +	cpus_read_unlock();  	free_percpu(works);  	return 0;  } @@ -3676,15 +3706,21 @@ static void pwq_unbound_release_workfn(struct work_struct *work)  						  unbound_release_work);  	struct workqueue_struct *wq = pwq->wq;  	struct worker_pool *pool = pwq->pool; -	bool is_last; +	bool is_last = false; -	if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND))) -		return; +	/* +	 * when @pwq is not linked, it doesn't hold any reference to the +	 * @wq, and @wq is invalid to access. +	 */ +	if (!list_empty(&pwq->pwqs_node)) { +		if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND))) +			return; -	mutex_lock(&wq->mutex); -	list_del_rcu(&pwq->pwqs_node); -	is_last = list_empty(&wq->pwqs); -	mutex_unlock(&wq->mutex); +		mutex_lock(&wq->mutex); +		list_del_rcu(&pwq->pwqs_node); +		is_last = list_empty(&wq->pwqs); +		mutex_unlock(&wq->mutex); +	}  	mutex_lock(&wq_pool_mutex);  	put_unbound_pool(pool); @@ -3707,7 +3743,7 @@ static void pwq_unbound_release_workfn(struct work_struct *work)   * @pwq: target pool_workqueue   *   * If @pwq isn't freezing, set @pwq->max_active to the associated - * workqueue's saved_max_active and activate delayed work items + * workqueue's saved_max_active and activate inactive work items   * accordingly.  If @pwq is freezing, clear @pwq->max_active to zero.   */  static void pwq_adjust_max_active(struct pool_workqueue *pwq) @@ -3736,9 +3772,9 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)  		pwq->max_active = wq->saved_max_active; -		while (!list_empty(&pwq->delayed_works) && +		while (!list_empty(&pwq->inactive_works) &&  		       pwq->nr_active < pwq->max_active) { -			pwq_activate_first_delayed(pwq); +			pwq_activate_first_inactive(pwq);  			kick = true;  		} @@ -3757,7 +3793,7 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)  	raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);  } -/* initialize newly alloced @pwq which is associated with @wq and @pool */ +/* initialize newly allocated @pwq which is associated with @wq and @pool */  static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,  		     struct worker_pool *pool)  { @@ -3769,7 +3805,7 @@ static void init_pwq(struct pool_workqueue *pwq, struct workqueue_struct *wq,  	pwq->wq = wq;  	pwq->flush_color = -1;  	pwq->refcnt = 1; -	INIT_LIST_HEAD(&pwq->delayed_works); +	INIT_LIST_HEAD(&pwq->inactive_works);  	INIT_LIST_HEAD(&pwq->pwqs_node);  	INIT_LIST_HEAD(&pwq->mayday_node);  	INIT_WORK(&pwq->unbound_release_work, pwq_unbound_release_workfn); @@ -4010,14 +4046,14 @@ static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)  static void apply_wqattrs_lock(void)  {  	/* CPUs should stay stable across pwq creations and installations */ -	get_online_cpus(); +	cpus_read_lock();  	mutex_lock(&wq_pool_mutex);  }  static void apply_wqattrs_unlock(void)  {  	mutex_unlock(&wq_pool_mutex); -	put_online_cpus(); +	cpus_read_unlock();  }  static int apply_workqueue_attrs_locked(struct workqueue_struct *wq, @@ -4062,7 +4098,7 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,   *   * Performs GFP_KERNEL allocations.   * - * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus(). + * Assumes caller has CPU hotplug read exclusion, i.e. cpus_read_lock().   *   * Return: 0 on success and -errno on failure.   */ @@ -4190,7 +4226,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)  		return 0;  	} -	get_online_cpus(); +	cpus_read_lock();  	if (wq->flags & __WQ_ORDERED) {  		ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);  		/* there should only be single pwq for ordering guarantee */ @@ -4200,7 +4236,7 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)  	} else {  		ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);  	} -	put_online_cpus(); +	cpus_read_unlock();  	return ret;  } @@ -4356,7 +4392,7 @@ static bool pwq_busy(struct pool_workqueue *pwq)  	if ((pwq != pwq->wq->dfl_pwq) && (pwq->refcnt > 1))  		return true; -	if (pwq->nr_active || !list_empty(&pwq->delayed_works)) +	if (pwq->nr_active || !list_empty(&pwq->inactive_works))  		return true;  	return false; @@ -4552,7 +4588,7 @@ bool workqueue_congested(int cpu, struct workqueue_struct *wq)  	else  		pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); -	ret = !list_empty(&pwq->delayed_works); +	ret = !list_empty(&pwq->inactive_works);  	preempt_enable();  	rcu_read_unlock(); @@ -4748,11 +4784,11 @@ static void show_pwq(struct pool_workqueue *pwq)  		pr_cont("\n");  	} -	if (!list_empty(&pwq->delayed_works)) { +	if (!list_empty(&pwq->inactive_works)) {  		bool comma = false; -		pr_info("    delayed:"); -		list_for_each_entry(work, &pwq->delayed_works, entry) { +		pr_info("    inactive:"); +		list_for_each_entry(work, &pwq->inactive_works, entry) {  			pr_cont_work(comma, work);  			comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);  		} @@ -4782,7 +4818,7 @@ void show_workqueue_state(void)  		bool idle = true;  		for_each_pwq(pwq, wq) { -			if (pwq->nr_active || !list_empty(&pwq->delayed_works)) { +			if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {  				idle = false;  				break;  			} @@ -4794,7 +4830,7 @@ void show_workqueue_state(void)  		for_each_pwq(pwq, wq) {  			raw_spin_lock_irqsave(&pwq->pool->lock, flags); -			if (pwq->nr_active || !list_empty(&pwq->delayed_works)) +			if (pwq->nr_active || !list_empty(&pwq->inactive_works))  				show_pwq(pwq);  			raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);  			/* @@ -5162,10 +5198,10 @@ long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg)  {  	long ret = -ENODEV; -	get_online_cpus(); +	cpus_read_lock();  	if (cpu_online(cpu))  		ret = work_on_cpu(cpu, fn, arg); -	put_online_cpus(); +	cpus_read_unlock();  	return ret;  }  EXPORT_SYMBOL_GPL(work_on_cpu_safe); @@ -5177,7 +5213,7 @@ EXPORT_SYMBOL_GPL(work_on_cpu_safe);   * freeze_workqueues_begin - begin freezing workqueues   *   * Start freezing workqueues.  After this function returns, all freezable - * workqueues will queue new works to their delayed_works list instead of + * workqueues will queue new works to their inactive_works list instead of   * pool->worklist.   *   * CONTEXT: @@ -5325,7 +5361,7 @@ static int workqueue_apply_unbound_cpumask(void)   *  the affinity of all unbound workqueues.  This function check the @cpumask   *  and apply it to all unbound workqueues and updates all pwqs of them.   * - *  Retun:	0	- Success + *  Return:	0	- Success   *  		-EINVAL	- Invalid @cpumask   *  		-ENOMEM	- Failed to allocate memory for attrs or pwqs.   */ @@ -5437,7 +5473,7 @@ static ssize_t wq_pool_ids_show(struct device *dev,  	const char *delim = "";  	int node, written = 0; -	get_online_cpus(); +	cpus_read_lock();  	rcu_read_lock();  	for_each_node(node) {  		written += scnprintf(buf + written, PAGE_SIZE - written, @@ -5447,7 +5483,7 @@ static ssize_t wq_pool_ids_show(struct device *dev,  	}  	written += scnprintf(buf + written, PAGE_SIZE - written, "\n");  	rcu_read_unlock(); -	put_online_cpus(); +	cpus_read_unlock();  	return written;  } @@ -5896,6 +5932,13 @@ static void __init wq_numa_init(void)  		return;  	} +	for_each_possible_cpu(cpu) { +		if (WARN_ON(cpu_to_node(cpu) == NUMA_NO_NODE)) { +			pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu); +			return; +		} +	} +  	wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs();  	BUG_ON(!wq_update_unbound_numa_attrs_buf); @@ -5913,11 +5956,6 @@ static void __init wq_numa_init(void)  	for_each_possible_cpu(cpu) {  		node = cpu_to_node(cpu); -		if (WARN_ON(node == NUMA_NO_NODE)) { -			pr_warn("workqueue: NUMA node mapping not available for cpu%d, disabling NUMA support\n", cpu); -			/* happens iff arch is bonkers, let's just proceed */ -			return; -		}  		cpumask_set_cpu(cpu, tbl[node]);  	}  |