diff options
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r-- | kernel/workqueue.c | 107 |
1 files changed, 104 insertions, 3 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e163e7a7f5e5..4026d1871407 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -648,7 +648,7 @@ static void set_work_pool_and_clear_pending(struct work_struct *work, * The following mb guarantees that previous clear of a PENDING bit * will not be reordered with any speculative LOADS or STORES from * work->current_func, which is executed afterwards. This possible - * reordering can lead to a missed execution on attempt to qeueue + * reordering can lead to a missed execution on attempt to queue * the same @work. E.g. consider this case: * * CPU#0 CPU#1 @@ -920,6 +920,16 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task) * CONTEXT: * spin_lock_irq(rq->lock) * + * This function is called during schedule() when a kworker is going + * to sleep. It's used by psi to identify aggregation workers during + * dequeuing, to allow periodic aggregation to shut-off when that + * worker is the last task in the system or cgroup to go to sleep. + * + * As this function doesn't involve any workqueue-related locking, it + * only returns stable values when called from inside the scheduler's + * queuing and dequeuing paths, when @task, which must be a kworker, + * is guaranteed to not be processing any works. + * * Return: * The last work function %current executed as a worker, NULL if it * hasn't executed any work yet. @@ -1343,7 +1353,7 @@ static bool is_chained_work(struct workqueue_struct *wq) worker = current_wq_worker(); /* - * Return %true iff I'm a worker execuing a work item on @wq. If + * Return %true iff I'm a worker executing a work item on @wq. If * I'm @worker, it's safe to dereference it without locking. */ return worker && worker->current_pwq->wq == wq; @@ -1514,6 +1524,90 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, } EXPORT_SYMBOL(queue_work_on); +/** + * workqueue_select_cpu_near - Select a CPU based on NUMA node + * @node: NUMA node ID that we want to select a CPU from + * + * This function will attempt to find a "random" cpu available on a given + * node. If there are no CPUs available on the given node it will return + * WORK_CPU_UNBOUND indicating that we should just schedule to any + * available CPU if we need to schedule this work. + */ +static int workqueue_select_cpu_near(int node) +{ + int cpu; + + /* No point in doing this if NUMA isn't enabled for workqueues */ + if (!wq_numa_enabled) + return WORK_CPU_UNBOUND; + + /* Delay binding to CPU if node is not valid or online */ + if (node < 0 || node >= MAX_NUMNODES || !node_online(node)) + return WORK_CPU_UNBOUND; + + /* Use local node/cpu if we are already there */ + cpu = raw_smp_processor_id(); + if (node == cpu_to_node(cpu)) + return cpu; + + /* Use "random" otherwise know as "first" online CPU of node */ + cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask); + + /* If CPU is valid return that, otherwise just defer */ + return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND; +} + +/** + * queue_work_node - queue work on a "random" cpu for a given NUMA node + * @node: NUMA node that we are targeting the work for + * @wq: workqueue to use + * @work: work to queue + * + * We queue the work to a "random" CPU within a given NUMA node. The basic + * idea here is to provide a way to somehow associate work with a given + * NUMA node. + * + * This function will only make a best effort attempt at getting this onto + * the right NUMA node. If no node is requested or the requested node is + * offline then we just fall back to standard queue_work behavior. + * + * Currently the "random" CPU ends up being the first available CPU in the + * intersection of cpu_online_mask and the cpumask of the node, unless we + * are running on the node. In that case we just use the current CPU. + * + * Return: %false if @work was already on a queue, %true otherwise. + */ +bool queue_work_node(int node, struct workqueue_struct *wq, + struct work_struct *work) +{ + unsigned long flags; + bool ret = false; + + /* + * This current implementation is specific to unbound workqueues. + * Specifically we only return the first available CPU for a given + * node instead of cycling through individual CPUs within the node. + * + * If this is used with a per-cpu workqueue then the logic in + * workqueue_select_cpu_near would need to be updated to allow for + * some round robin type logic. + */ + WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND)); + + local_irq_save(flags); + + if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { + int cpu = workqueue_select_cpu_near(node); + + __queue_work(cpu, wq, work); + ret = true; + } + + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(queue_work_node); + void delayed_work_timer_fn(struct timer_list *t) { struct delayed_work *dwork = from_timer(dwork, t, timer); @@ -1641,7 +1735,7 @@ static void rcu_work_rcufn(struct rcu_head *rcu) * * Return: %false if @rwork was already pending, %true otherwise. Note * that a full RCU grace period is guaranteed only after a %true return. - * While @rwork is guarnateed to be executed after a %false return, the + * While @rwork is guaranteed to be executed after a %false return, the * execution may happen before a full RCU grace period has passed. */ bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork) @@ -2933,6 +3027,9 @@ static bool __flush_work(struct work_struct *work, bool from_cancel) if (WARN_ON(!wq_online)) return false; + if (WARN_ON(!work->func)) + return false; + if (!from_cancel) { lock_map_acquire(&work->lockdep_map); lock_map_release(&work->lockdep_map); @@ -3348,6 +3445,8 @@ static void wq_init_lockdep(struct workqueue_struct *wq) lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name); if (!lock_name) lock_name = wq->name; + + wq->lock_name = lock_name; lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0); } @@ -4194,6 +4293,8 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, return wq; err_free_wq: + wq_unregister_lockdep(wq); + wq_free_lockdep(wq); free_workqueue_attrs(wq->unbound_attrs); kfree(wq); return NULL; |