diff options
Diffstat (limited to 'kernel/workqueue.c')
| -rw-r--r-- | kernel/workqueue.c | 206 | 
1 files changed, 64 insertions, 142 deletions
| diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 35974ac69600..5dbe22aa3efd 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -265,7 +265,6 @@ struct workqueue_struct {  static struct kmem_cache *pwq_cache; -static int wq_numa_tbl_len;		/* highest possible NUMA node id + 1 */  static cpumask_var_t *wq_numa_possible_cpumask;  					/* possible CPUs of each node */ @@ -758,13 +757,6 @@ static bool too_many_workers(struct worker_pool *pool)  	int nr_idle = pool->nr_idle + managing; /* manager is considered idle */  	int nr_busy = pool->nr_workers - nr_idle; -	/* -	 * nr_idle and idle_list may disagree if idle rebinding is in -	 * progress.  Never return %true if idle_list is empty. -	 */ -	if (list_empty(&pool->idle_list)) -		return false; -  	return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;  } @@ -850,7 +842,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)  	pool = worker->pool;  	/* this can only happen on the local cpu */ -	if (WARN_ON_ONCE(cpu != raw_smp_processor_id())) +	if (WARN_ON_ONCE(cpu != raw_smp_processor_id() || pool->cpu != cpu))  		return NULL;  	/* @@ -874,35 +866,22 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)   * worker_set_flags - set worker flags and adjust nr_running accordingly   * @worker: self   * @flags: flags to set - * @wakeup: wakeup an idle worker if necessary   * - * Set @flags in @worker->flags and adjust nr_running accordingly.  If - * nr_running becomes zero and @wakeup is %true, an idle worker is - * woken up. + * Set @flags in @worker->flags and adjust nr_running accordingly.   *   * CONTEXT:   * spin_lock_irq(pool->lock)   */ -static inline void worker_set_flags(struct worker *worker, unsigned int flags, -				    bool wakeup) +static inline void worker_set_flags(struct worker *worker, unsigned int flags)  {  	struct worker_pool *pool = worker->pool;  	WARN_ON_ONCE(worker->task != current); -	/* -	 * If transitioning into NOT_RUNNING, adjust nr_running and -	 * wake up an idle worker as necessary if requested by -	 * @wakeup. -	 */ +	/* If transitioning into NOT_RUNNING, adjust nr_running. */  	if ((flags & WORKER_NOT_RUNNING) &&  	    !(worker->flags & WORKER_NOT_RUNNING)) { -		if (wakeup) { -			if (atomic_dec_and_test(&pool->nr_running) && -			    !list_empty(&pool->worklist)) -				wake_up_worker(pool); -		} else -			atomic_dec(&pool->nr_running); +		atomic_dec(&pool->nr_running);  	}  	worker->flags |= flags; @@ -1232,7 +1211,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,  			pwq_activate_delayed_work(work);  		list_del_init(&work->entry); -		pwq_dec_nr_in_flight(get_work_pwq(work), get_work_color(work)); +		pwq_dec_nr_in_flight(pwq, get_work_color(work));  		/* work->data points to pwq iff queued, point to pool */  		set_work_pool_and_keep_pending(work, pool->id); @@ -1560,7 +1539,7 @@ static void worker_enter_idle(struct worker *worker)  			 (worker->hentry.next || worker->hentry.pprev)))  		return; -	/* can't use worker_set_flags(), also called from start_worker() */ +	/* can't use worker_set_flags(), also called from create_worker() */  	worker->flags |= WORKER_IDLE;  	pool->nr_idle++;  	worker->last_active = jiffies; @@ -1602,11 +1581,11 @@ static void worker_leave_idle(struct worker *worker)  	list_del_init(&worker->entry);  } -static struct worker *alloc_worker(void) +static struct worker *alloc_worker(int node)  {  	struct worker *worker; -	worker = kzalloc(sizeof(*worker), GFP_KERNEL); +	worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, node);  	if (worker) {  		INIT_LIST_HEAD(&worker->entry);  		INIT_LIST_HEAD(&worker->scheduled); @@ -1670,6 +1649,9 @@ static void worker_detach_from_pool(struct worker *worker,  		detach_completion = pool->detach_completion;  	mutex_unlock(&pool->attach_mutex); +	/* clear leftover flags without pool->lock after it is detached */ +	worker->flags &= ~(WORKER_UNBOUND | WORKER_REBOUND); +  	if (detach_completion)  		complete(detach_completion);  } @@ -1678,8 +1660,7 @@ static void worker_detach_from_pool(struct worker *worker,   * create_worker - create a new workqueue worker   * @pool: pool the new worker will belong to   * - * Create a new worker which is attached to @pool.  The new worker must be - * started by start_worker(). + * Create and start a new worker which is attached to @pool.   *   * CONTEXT:   * Might sleep.  Does GFP_KERNEL allocations. @@ -1698,7 +1679,7 @@ static struct worker *create_worker(struct worker_pool *pool)  	if (id < 0)  		goto fail; -	worker = alloc_worker(); +	worker = alloc_worker(pool->node);  	if (!worker)  		goto fail; @@ -1724,6 +1705,13 @@ static struct worker *create_worker(struct worker_pool *pool)  	/* successful, attach the worker to the pool */  	worker_attach_to_pool(worker, pool); +	/* start the newly created worker */ +	spin_lock_irq(&pool->lock); +	worker->pool->nr_workers++; +	worker_enter_idle(worker); +	wake_up_process(worker->task); +	spin_unlock_irq(&pool->lock); +  	return worker;  fail: @@ -1734,44 +1722,6 @@ fail:  }  /** - * start_worker - start a newly created worker - * @worker: worker to start - * - * Make the pool aware of @worker and start it. - * - * CONTEXT: - * spin_lock_irq(pool->lock). - */ -static void start_worker(struct worker *worker) -{ -	worker->pool->nr_workers++; -	worker_enter_idle(worker); -	wake_up_process(worker->task); -} - -/** - * create_and_start_worker - create and start a worker for a pool - * @pool: the target pool - * - * Grab the managership of @pool and create and start a new worker for it. - * - * Return: 0 on success. A negative error code otherwise. - */ -static int create_and_start_worker(struct worker_pool *pool) -{ -	struct worker *worker; - -	worker = create_worker(pool); -	if (worker) { -		spin_lock_irq(&pool->lock); -		start_worker(worker); -		spin_unlock_irq(&pool->lock); -	} - -	return worker ? 0 : -ENOMEM; -} - -/**   * destroy_worker - destroy a workqueue worker   * @worker: worker to be destroyed   * @@ -1909,23 +1859,10 @@ restart:  	mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);  	while (true) { -		struct worker *worker; - -		worker = create_worker(pool); -		if (worker) { -			del_timer_sync(&pool->mayday_timer); -			spin_lock_irq(&pool->lock); -			start_worker(worker); -			if (WARN_ON_ONCE(need_to_create_worker(pool))) -				goto restart; -			return true; -		} - -		if (!need_to_create_worker(pool)) +		if (create_worker(pool) || !need_to_create_worker(pool))  			break; -		__set_current_state(TASK_INTERRUPTIBLE); -		schedule_timeout(CREATE_COOLDOWN); +		schedule_timeout_interruptible(CREATE_COOLDOWN);  		if (!need_to_create_worker(pool))  			break; @@ -1933,6 +1870,11 @@ restart:  	del_timer_sync(&pool->mayday_timer);  	spin_lock_irq(&pool->lock); +	/* +	 * This is necessary even after a new worker was just successfully +	 * created as @pool->lock was dropped and the new worker might have +	 * already become busy. +	 */  	if (need_to_create_worker(pool))  		goto restart;  	return true; @@ -2020,13 +1962,8 @@ __acquires(&pool->lock)  	lockdep_copy_map(&lockdep_map, &work->lockdep_map);  #endif -	/* -	 * Ensure we're on the correct CPU.  DISASSOCIATED test is -	 * necessary to avoid spurious warnings from rescuers servicing the -	 * unbound or a disassociated pool. -	 */ -	WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) && -		     !(pool->flags & POOL_DISASSOCIATED) && +	/* ensure we're on the correct CPU */ +	WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) &&  		     raw_smp_processor_id() != pool->cpu);  	/* @@ -2052,17 +1989,22 @@ __acquires(&pool->lock)  	list_del_init(&work->entry);  	/* -	 * CPU intensive works don't participate in concurrency -	 * management.  They're the scheduler's responsibility. +	 * CPU intensive works don't participate in concurrency management. +	 * They're the scheduler's responsibility.  This takes @worker out +	 * of concurrency management and the next code block will chain +	 * execution of the pending work items.  	 */  	if (unlikely(cpu_intensive)) -		worker_set_flags(worker, WORKER_CPU_INTENSIVE, true); +		worker_set_flags(worker, WORKER_CPU_INTENSIVE);  	/* -	 * Unbound pool isn't concurrency managed and work items should be -	 * executed ASAP.  Wake up another worker if necessary. +	 * Wake up another worker if necessary.  The condition is always +	 * false for normal per-cpu workers since nr_running would always +	 * be >= 1 at this point.  This is used to chain execution of the +	 * pending work items for WORKER_NOT_RUNNING workers such as the +	 * UNBOUND and CPU_INTENSIVE ones.  	 */ -	if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) +	if (need_more_worker(pool))  		wake_up_worker(pool);  	/* @@ -2218,7 +2160,7 @@ recheck:  		}  	} while (keep_working(pool)); -	worker_set_flags(worker, WORKER_PREP, false); +	worker_set_flags(worker, WORKER_PREP);  sleep:  	/*  	 * pool->lock is held and there's no work to process and no need to @@ -2311,29 +2253,27 @@ repeat:  				move_linked_works(work, scheduled, &n);  		process_scheduled_works(rescuer); -		spin_unlock_irq(&pool->lock); - -		worker_detach_from_pool(rescuer, pool); - -		spin_lock_irq(&pool->lock);  		/*  		 * Put the reference grabbed by send_mayday().  @pool won't -		 * go away while we're holding its lock. +		 * go away while we're still attached to it.  		 */  		put_pwq(pwq);  		/* -		 * Leave this pool.  If keep_working() is %true, notify a +		 * Leave this pool.  If need_more_worker() is %true, notify a  		 * regular worker; otherwise, we end up with 0 concurrency  		 * and stalling the execution.  		 */ -		if (keep_working(pool)) +		if (need_more_worker(pool))  			wake_up_worker(pool);  		rescuer->pool = NULL; -		spin_unlock(&pool->lock); -		spin_lock(&wq_mayday_lock); +		spin_unlock_irq(&pool->lock); + +		worker_detach_from_pool(rescuer, pool); + +		spin_lock_irq(&wq_mayday_lock);  	}  	spin_unlock_irq(&wq_mayday_lock); @@ -3458,7 +3398,7 @@ static void put_unbound_pool(struct worker_pool *pool)  		return;  	/* sanity checks */ -	if (WARN_ON(!(pool->flags & POOL_DISASSOCIATED)) || +	if (WARN_ON(!(pool->cpu < 0)) ||  	    WARN_ON(!list_empty(&pool->worklist)))  		return; @@ -3524,7 +3464,7 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)  	hash_for_each_possible(unbound_pool_hash, pool, hash_node, hash) {  		if (wqattrs_equal(pool->attrs, attrs)) {  			pool->refcnt++; -			goto out_unlock; +			return pool;  		}  	} @@ -3557,12 +3497,12 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)  		goto fail;  	/* create and start the initial worker */ -	if (create_and_start_worker(pool) < 0) +	if (!create_worker(pool))  		goto fail;  	/* install */  	hash_add(unbound_pool_hash, &pool->hash_node, hash); -out_unlock: +  	return pool;  fail:  	if (pool) @@ -3591,11 +3531,6 @@ static void pwq_unbound_release_workfn(struct work_struct *work)  	if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)))  		return; -	/* -	 * Unlink @pwq.  Synchronization against wq->mutex isn't strictly -	 * necessary on release but do it anyway.  It's easier to verify -	 * and consistent with the linking path. -	 */  	mutex_lock(&wq->mutex);  	list_del_rcu(&pwq->pwqs_node);  	is_last = list_empty(&wq->pwqs); @@ -3692,10 +3627,7 @@ static void link_pwq(struct pool_workqueue *pwq)  	if (!list_empty(&pwq->pwqs_node))  		return; -	/* -	 * Set the matching work_color.  This is synchronized with -	 * wq->mutex to avoid confusing flush_workqueue(). -	 */ +	/* set the matching work_color */  	pwq->work_color = wq->work_color;  	/* sync max_active to the current setting */ @@ -3832,7 +3764,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,  	if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))  		return -EINVAL; -	pwq_tbl = kzalloc(wq_numa_tbl_len * sizeof(pwq_tbl[0]), GFP_KERNEL); +	pwq_tbl = kzalloc(nr_node_ids * sizeof(pwq_tbl[0]), GFP_KERNEL);  	new_attrs = alloc_workqueue_attrs(GFP_KERNEL);  	tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);  	if (!pwq_tbl || !new_attrs || !tmp_attrs) @@ -4080,7 +4012,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,  	/* allocate wq and format name */  	if (flags & WQ_UNBOUND) -		tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]); +		tbl_size = nr_node_ids * sizeof(wq->numa_pwq_tbl[0]);  	wq = kzalloc(sizeof(*wq) + tbl_size, GFP_KERNEL);  	if (!wq) @@ -4122,7 +4054,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,  	if (flags & WQ_MEM_RECLAIM) {  		struct worker *rescuer; -		rescuer = alloc_worker(); +		rescuer = alloc_worker(NUMA_NO_NODE);  		if (!rescuer)  			goto err_destroy; @@ -4470,8 +4402,6 @@ static void wq_unbind_fn(struct work_struct *work)  	struct worker *worker;  	for_each_cpu_worker_pool(pool, cpu) { -		WARN_ON_ONCE(cpu != smp_processor_id()); -  		mutex_lock(&pool->attach_mutex);  		spin_lock_irq(&pool->lock); @@ -4543,6 +4473,7 @@ static void rebind_workers(struct worker_pool *pool)  						  pool->attrs->cpumask) < 0);  	spin_lock_irq(&pool->lock); +	pool->flags &= ~POOL_DISASSOCIATED;  	for_each_pool_worker(worker, pool) {  		unsigned int worker_flags = worker->flags; @@ -4632,7 +4563,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,  		for_each_cpu_worker_pool(pool, cpu) {  			if (pool->nr_workers)  				continue; -			if (create_and_start_worker(pool) < 0) +			if (!create_worker(pool))  				return NOTIFY_BAD;  		}  		break; @@ -4644,15 +4575,10 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,  		for_each_pool(pool, pi) {  			mutex_lock(&pool->attach_mutex); -			if (pool->cpu == cpu) { -				spin_lock_irq(&pool->lock); -				pool->flags &= ~POOL_DISASSOCIATED; -				spin_unlock_irq(&pool->lock); - +			if (pool->cpu == cpu)  				rebind_workers(pool); -			} else if (pool->cpu < 0) { +			else if (pool->cpu < 0)  				restore_unbound_workers_cpumask(pool, cpu); -			}  			mutex_unlock(&pool->attach_mutex);  		} @@ -4856,10 +4782,6 @@ static void __init wq_numa_init(void)  	cpumask_var_t *tbl;  	int node, cpu; -	/* determine NUMA pwq table len - highest node id + 1 */ -	for_each_node(node) -		wq_numa_tbl_len = max(wq_numa_tbl_len, node + 1); -  	if (num_possible_nodes() <= 1)  		return; @@ -4876,7 +4798,7 @@ static void __init wq_numa_init(void)  	 * available.  Build one from cpu_to_node() which should have been  	 * fully initialized by now.  	 */ -	tbl = kzalloc(wq_numa_tbl_len * sizeof(tbl[0]), GFP_KERNEL); +	tbl = kzalloc(nr_node_ids * sizeof(tbl[0]), GFP_KERNEL);  	BUG_ON(!tbl);  	for_each_node(node) @@ -4936,7 +4858,7 @@ static int __init init_workqueues(void)  		for_each_cpu_worker_pool(pool, cpu) {  			pool->flags &= ~POOL_DISASSOCIATED; -			BUG_ON(create_and_start_worker(pool) < 0); +			BUG_ON(!create_worker(pool));  		}  	} |