aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2024-11-20 09:41:11 -0800
committerLinus Torvalds <[email protected]>2024-11-20 09:41:11 -0800
commitd6b6d39054fa3db4326e73e09576ed5f758ecd2b (patch)
tree709739d7a9b8d3bc81e202710b46ffc475eca5d1
parenta0e752bda210e7ff61c37ef3f7898bcbcd2693cb (diff)
parent85f0d8e39affb7b88401b1e0542230a7af985b96 (diff)
Merge tag 'wq-for-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue updates from Tejun Heo: - The maximum concurrency limit of 512 which was set a long time ago is too low now. A legitimate use (BPF cgroup release) of system_wq could saturate it under stress test conditions leading to false dependencies and deadlocks. While the offending use was switched to a dedicated workqueue, use the opportunity to bump WQ_MAX_ACTIVE four fold and document that system workqueue shouldn't be saturated. Workqueue should add at least a warning mechanism for cases where system workqueues are saturated. - Recent workqueue updates to support more flexible execution topology made unbound workqueues use per-cpu worker pool frontends which pushed up workqueue flush overhead. As consecutive CPUs are likely to be pointing to the same worker pool, reduce overhead by switching locks only when necessary. * tag 'wq-for-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: workqueue: Reduce expensive locks for unbound workqueue workqueue: Adjust WQ_MAX_ACTIVE from 512 to 2048 workqueue: doc: Add a note saturating the system_wq is not permitted
-rw-r--r--Documentation/core-api/workqueue.rst9
-rw-r--r--include/linux/workqueue.h2
-rw-r--r--kernel/workqueue.c22
3 files changed, 26 insertions, 7 deletions
diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst
index 16f861c9791e..e295835fc116 100644
--- a/Documentation/core-api/workqueue.rst
+++ b/Documentation/core-api/workqueue.rst
@@ -245,8 +245,8 @@ CPU which can be assigned to the work items of a wq. For example, with
at the same time per CPU. This is always a per-CPU attribute, even for
unbound workqueues.
-The maximum limit for ``@max_active`` is 512 and the default value used
-when 0 is specified is 256. These values are chosen sufficiently high
+The maximum limit for ``@max_active`` is 2048 and the default value used
+when 0 is specified is 1024. These values are chosen sufficiently high
such that they are not the limiting factor while providing protection in
runaway cases.
@@ -357,6 +357,11 @@ Guidelines
difference in execution characteristics between using a dedicated wq
and a system wq.
+ Note: If something may generate more than @max_active outstanding
+ work items (do stress test your producers), it may saturate a system
+ wq and potentially lead to deadlock. It should utilize its own
+ dedicated workqueue rather than the system wq.
+
* Unless work items are expected to consume a huge amount of CPU
cycles, using a bound wq is usually beneficial due to the increased
level of locality in wq operations and work item execution.
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 59c2695e12e7..b0dc957c3e56 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -412,7 +412,7 @@ enum wq_flags {
};
enum wq_consts {
- WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */
+ WQ_MAX_ACTIVE = 2048, /* I like 2048, better ideas? */
WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE,
WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9949ffad8df0..8b07576814a5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3833,16 +3833,28 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
{
bool wait = false;
struct pool_workqueue *pwq;
+ struct worker_pool *current_pool = NULL;
if (flush_color >= 0) {
WARN_ON_ONCE(atomic_read(&wq->nr_pwqs_to_flush));
atomic_set(&wq->nr_pwqs_to_flush, 1);
}
+ /*
+ * For unbound workqueue, pwqs will map to only a few pools.
+ * Most of the time, pwqs within the same pool will be linked
+ * sequentially to wq->pwqs by cpu index. So in the majority
+ * of pwq iters, the pool is the same, only doing lock/unlock
+ * if the pool has changed. This can largely reduce expensive
+ * lock operations.
+ */
for_each_pwq(pwq, wq) {
- struct worker_pool *pool = pwq->pool;
-
- raw_spin_lock_irq(&pool->lock);
+ if (current_pool != pwq->pool) {
+ if (likely(current_pool))
+ raw_spin_unlock_irq(&current_pool->lock);
+ current_pool = pwq->pool;
+ raw_spin_lock_irq(&current_pool->lock);
+ }
if (flush_color >= 0) {
WARN_ON_ONCE(pwq->flush_color != -1);
@@ -3859,9 +3871,11 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
pwq->work_color = work_color;
}
- raw_spin_unlock_irq(&pool->lock);
}
+ if (current_pool)
+ raw_spin_unlock_irq(&current_pool->lock);
+
if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
complete(&wq->first_flusher->done);