aboutsummaryrefslogtreecommitdiff
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c142
1 files changed, 135 insertions, 7 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b5aba0e5a699..8d465478adb9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -285,6 +285,16 @@ struct wq_flusher {
struct wq_device;
/*
+ * Unlike in a per-cpu workqueue where max_active limits its concurrency level
+ * on each CPU, in an unbound workqueue, max_active applies to the whole system.
+ * As sharing a single nr_active across multiple sockets can be very expensive,
+ * the counting and enforcement is per NUMA node.
+ */
+struct wq_node_nr_active {
+ atomic_t nr; /* per-node nr_active count */
+};
+
+/*
* The externally visible workqueue. It relays the issued work items to
* the appropriate worker_pool through its pool_workqueues.
*/
@@ -330,6 +340,7 @@ struct workqueue_struct {
/* hot fields used during command issue, aligned to cacheline */
unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */
+ struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */
};
static struct kmem_cache *pwq_cache;
@@ -1426,6 +1437,31 @@ work_func_t wq_worker_last_func(struct task_struct *task)
}
/**
+ * wq_node_nr_active - Determine wq_node_nr_active to use
+ * @wq: workqueue of interest
+ * @node: NUMA node, can be %NUMA_NO_NODE
+ *
+ * Determine wq_node_nr_active to use for @wq on @node. Returns:
+ *
+ * - %NULL for per-cpu workqueues as they don't need to use shared nr_active.
+ *
+ * - node_nr_active[nr_node_ids] if @node is %NUMA_NO_NODE.
+ *
+ * - Otherwise, node_nr_active[@node].
+ */
+static struct wq_node_nr_active *wq_node_nr_active(struct workqueue_struct *wq,
+ int node)
+{
+ if (!(wq->flags & WQ_UNBOUND))
+ return NULL;
+
+ if (node == NUMA_NO_NODE)
+ node = nr_node_ids;
+
+ return wq->node_nr_active[node];
+}
+
+/**
* get_pwq - get an extra reference on the specified pool_workqueue
* @pwq: pool_workqueue to get
*
@@ -1506,12 +1542,17 @@ static bool pwq_activate_work(struct pool_workqueue *pwq,
struct work_struct *work)
{
struct worker_pool *pool = pwq->pool;
+ struct wq_node_nr_active *nna;
lockdep_assert_held(&pool->lock);
if (!(*work_data_bits(work) & WORK_STRUCT_INACTIVE))
return false;
+ nna = wq_node_nr_active(pwq->wq, pool->node);
+ if (nna)
+ atomic_inc(&nna->nr);
+
pwq->nr_active++;
__pwq_activate_work(pwq, work);
return true;
@@ -1528,14 +1569,18 @@ static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq)
{
struct workqueue_struct *wq = pwq->wq;
struct worker_pool *pool = pwq->pool;
+ struct wq_node_nr_active *nna = wq_node_nr_active(wq, pool->node);
bool obtained;
lockdep_assert_held(&pool->lock);
obtained = pwq->nr_active < READ_ONCE(wq->max_active);
- if (obtained)
+ if (obtained) {
pwq->nr_active++;
+ if (nna)
+ atomic_inc(&nna->nr);
+ }
return obtained;
}
@@ -1572,10 +1617,26 @@ static bool pwq_activate_first_inactive(struct pool_workqueue *pwq)
static void pwq_dec_nr_active(struct pool_workqueue *pwq)
{
struct worker_pool *pool = pwq->pool;
+ struct wq_node_nr_active *nna = wq_node_nr_active(pwq->wq, pool->node);
lockdep_assert_held(&pool->lock);
+ /*
+ * @pwq->nr_active should be decremented for both percpu and unbound
+ * workqueues.
+ */
pwq->nr_active--;
+
+ /*
+ * For a percpu workqueue, it's simple. Just need to kick the first
+ * inactive work item on @pwq itself.
+ */
+ if (!nna) {
+ pwq_activate_first_inactive(pwq);
+ return;
+ }
+
+ atomic_dec(&nna->nr);
pwq_activate_first_inactive(pwq);
}
@@ -4039,11 +4100,63 @@ static void wq_free_lockdep(struct workqueue_struct *wq)
}
#endif
+static void free_node_nr_active(struct wq_node_nr_active **nna_ar)
+{
+ int node;
+
+ for_each_node(node) {
+ kfree(nna_ar[node]);
+ nna_ar[node] = NULL;
+ }
+
+ kfree(nna_ar[nr_node_ids]);
+ nna_ar[nr_node_ids] = NULL;
+}
+
+static void init_node_nr_active(struct wq_node_nr_active *nna)
+{
+ atomic_set(&nna->nr, 0);
+}
+
+/*
+ * Each node's nr_active counter will be accessed mostly from its own node and
+ * should be allocated in the node.
+ */
+static int alloc_node_nr_active(struct wq_node_nr_active **nna_ar)
+{
+ struct wq_node_nr_active *nna;
+ int node;
+
+ for_each_node(node) {
+ nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, node);
+ if (!nna)
+ goto err_free;
+ init_node_nr_active(nna);
+ nna_ar[node] = nna;
+ }
+
+ /* [nr_node_ids] is used as the fallback */
+ nna = kzalloc_node(sizeof(*nna), GFP_KERNEL, NUMA_NO_NODE);
+ if (!nna)
+ goto err_free;
+ init_node_nr_active(nna);
+ nna_ar[nr_node_ids] = nna;
+
+ return 0;
+
+err_free:
+ free_node_nr_active(nna_ar);
+ return -ENOMEM;
+}
+
static void rcu_free_wq(struct rcu_head *rcu)
{
struct workqueue_struct *wq =
container_of(rcu, struct workqueue_struct, rcu);
+ if (wq->flags & WQ_UNBOUND)
+ free_node_nr_active(wq->node_nr_active);
+
wq_free_lockdep(wq);
free_percpu(wq->cpu_pwq);
free_workqueue_attrs(wq->unbound_attrs);
@@ -4785,7 +4898,8 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
{
va_list args;
struct workqueue_struct *wq;
- int len;
+ size_t wq_size;
+ int name_len;
/*
* Unbound && max_active == 1 used to imply ordered, which is no longer
@@ -4801,7 +4915,12 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
flags |= WQ_UNBOUND;
/* allocate wq and format name */
- wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+ if (flags & WQ_UNBOUND)
+ wq_size = struct_size(wq, node_nr_active, nr_node_ids + 1);
+ else
+ wq_size = sizeof(*wq);
+
+ wq = kzalloc(wq_size, GFP_KERNEL);
if (!wq)
return NULL;
@@ -4812,11 +4931,12 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
}
va_start(args, max_active);
- len = vsnprintf(wq->name, sizeof(wq->name), fmt, args);
+ name_len = vsnprintf(wq->name, sizeof(wq->name), fmt, args);
va_end(args);
- if (len >= WQ_NAME_LEN)
- pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n", wq->name);
+ if (name_len >= WQ_NAME_LEN)
+ pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n",
+ wq->name);
max_active = max_active ?: WQ_DFL_ACTIVE;
max_active = wq_clamp_max_active(max_active, flags, wq->name);
@@ -4835,8 +4955,13 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
wq_init_lockdep(wq);
INIT_LIST_HEAD(&wq->list);
+ if (flags & WQ_UNBOUND) {
+ if (alloc_node_nr_active(wq->node_nr_active) < 0)
+ goto err_unreg_lockdep;
+ }
+
if (alloc_and_link_pwqs(wq) < 0)
- goto err_unreg_lockdep;
+ goto err_free_node_nr_active;
if (wq_online && init_rescuer(wq) < 0)
goto err_destroy;
@@ -4861,6 +4986,9 @@ struct workqueue_struct *alloc_workqueue(const char *fmt,
return wq;
+err_free_node_nr_active:
+ if (wq->flags & WQ_UNBOUND)
+ free_node_nr_active(wq->node_nr_active);
err_unreg_lockdep:
wq_unregister_lockdep(wq);
wq_free_lockdep(wq);