From e5cba24e3f018d4beb6acd101a82483c98f91ce7 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Fri, 26 Nov 2010 12:06:44 +0100 Subject: workqueue: check the allocation of system_unbound_wq I found a trivial bug on initialization of workqueue. Current init_workqueues doesn't check the result of allocation of system_unbound_wq, this should be checked like other queues. Signed-off-by: Hitoshi Mitake Cc: Arjan van de Ven Cc: David Howells Signed-off-by: Tejun Heo --- kernel/workqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 90db1bd1a978..ca017ce8bc6b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3692,7 +3692,8 @@ static int __init init_workqueues(void) system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); - BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq); + BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || + !system_unbound_wq); return 0; } early_initcall(init_workqueues); -- cgit From 2d64672ed38721b7a3815009d79bfb90a1f34a17 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 3 Dec 2010 23:12:33 -0500 Subject: workqueue: It is likely that WORKER_NOT_RUNNING is true Running the annotate branch profiler on three boxes, including my main box that runs firefox, evolution, xchat, and is part of the distcc farm, showed this with the likelys in the workqueue code: correct incorrect % Function File Line ------- --------- - -------- ---- ---- 96 996253 99 wq_worker_sleeping workqueue.c 703 96 996247 99 wq_worker_waking_up workqueue.c 677 The likely()s in this case were assuming that WORKER_NOT_RUNNING will most likely be false. But this is not the case. The reason is (and shown by adding trace_printks and testing it) that most of the time WORKER_PREP is set. In worker_thread() we have: worker_clr_flags(worker, WORKER_PREP); [ do work stuff ] worker_set_flags(worker, WORKER_PREP, false); (that 'false' means not to wake up an idle worker) The wq_worker_sleeping() is called from schedule when a worker thread is putting itself to sleep. Which happens most of the time outside of that [ do work stuff ]. The wq_worker_waking_up is called by the wakeup worker code, which is also callod outside that [ do work stuff ]. Thus, the likely and unlikely used by those two functions are actually backwards. Remove the annotation and let gcc figure it out. Acked-by: Tejun Heo Signed-off-by: Steven Rostedt Signed-off-by: Tejun Heo --- kernel/workqueue.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ca017ce8bc6b..e785b0f2aea5 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -661,7 +661,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) { struct worker *worker = kthread_data(task); - if (likely(!(worker->flags & WORKER_NOT_RUNNING))) + if (!(worker->flags & WORKER_NOT_RUNNING)) atomic_inc(get_gcwq_nr_running(cpu)); } @@ -687,7 +687,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, struct global_cwq *gcwq = get_gcwq(cpu); atomic_t *nr_running = get_gcwq_nr_running(cpu); - if (unlikely(worker->flags & WORKER_NOT_RUNNING)) + if (worker->flags & WORKER_NOT_RUNNING) return NULL; /* this can only happen on the local cpu */ -- cgit From c8efcc2589464ac70255bb83e10cad61c7c6d295 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 20 Dec 2010 19:32:04 +0100 Subject: workqueue: allow chained queueing during destruction Currently, destroy_workqueue() makes the workqueue deny all new queueing by setting WQ_DYING and flushes the workqueue once before proceeding with destruction; however, there are cases where work items queue more related work items. Currently, such users need to explicitly flush the workqueue multiple times depending on the possible depth of such chained queueing. This patch updates the queueing path such that a work item can queue further work items on the same workqueue even when WQ_DYING is set. The flush on destruction is automatically retried until the workqueue is empty. This guarantees that the workqueue is empty on destruction while allowing chained queueing. The flush retry logic whines if it takes too many retries to drain the workqueue. Signed-off-by: Tejun Heo Cc: James Bottomley --- kernel/workqueue.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e785b0f2aea5..8ee6ec82f88a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -932,6 +932,38 @@ static void insert_work(struct cpu_workqueue_struct *cwq, wake_up_worker(gcwq); } +/* + * Test whether @work is being queued from another work executing on the + * same workqueue. This is rather expensive and should only be used from + * cold paths. + */ +static bool is_chained_work(struct workqueue_struct *wq) +{ + unsigned long flags; + unsigned int cpu; + + for_each_gcwq_cpu(cpu) { + struct global_cwq *gcwq = get_gcwq(cpu); + struct worker *worker; + struct hlist_node *pos; + int i; + + spin_lock_irqsave(&gcwq->lock, flags); + for_each_busy_worker(worker, i, pos, gcwq) { + if (worker->task != current) + continue; + spin_unlock_irqrestore(&gcwq->lock, flags); + /* + * I'm @worker, no locking necessary. See if @work + * is headed to the same workqueue. + */ + return worker->current_cwq->wq == wq; + } + spin_unlock_irqrestore(&gcwq->lock, flags); + } + return false; +} + static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, struct work_struct *work) { @@ -943,7 +975,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, debug_work_activate(work); - if (WARN_ON_ONCE(wq->flags & WQ_DYING)) + /* if dying, only works from the same workqueue are allowed */ + if (unlikely(wq->flags & WQ_DYING) && + WARN_ON_ONCE(!is_chained_work(wq))) return; /* determine gcwq to use */ @@ -2936,11 +2970,35 @@ EXPORT_SYMBOL_GPL(__alloc_workqueue_key); */ void destroy_workqueue(struct workqueue_struct *wq) { + unsigned int flush_cnt = 0; unsigned int cpu; + /* + * Mark @wq dying and drain all pending works. Once WQ_DYING is + * set, only chain queueing is allowed. IOW, only currently + * pending or running work items on @wq can queue further work + * items on it. @wq is flushed repeatedly until it becomes empty. + * The number of flushing is detemined by the depth of chaining and + * should be relatively short. Whine if it takes too long. + */ wq->flags |= WQ_DYING; +reflush: flush_workqueue(wq); + for_each_cwq_cpu(cpu, wq) { + struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); + + if (!cwq->nr_active && list_empty(&cwq->delayed_works)) + continue; + + if (++flush_cnt == 10 || + (flush_cnt % 100 == 0 && flush_cnt <= 1000)) + printk(KERN_WARNING "workqueue %s: flush on " + "destruction isn't complete after %u tries\n", + wq->name, flush_cnt); + goto reflush; + } + /* * wq list is used to freeze wq, remove from list after * flushing is complete in case freeze races us. -- cgit From e159489baa717dbae70f9903770a6a4990865887 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 9 Jan 2011 23:32:15 +0100 Subject: workqueue: relax lockdep annotation on flush_work() Currently, the lockdep annotation in flush_work() requires exclusive access on the workqueue the target work is queued on and triggers warning if a work is trying to flush another work on the same workqueue; however, this is no longer true as workqueues can now execute multiple works concurrently. This patch adds lock_map_acquire_read() and make process_one_work() hold read access to the workqueue while executing a work and start_flush_work() check for write access if concurrnecy level is one or the workqueue has a rescuer (as only one execution resource - the rescuer - is guaranteed to be available under memory pressure), and read access if higher. This better represents what's going on and removes spurious lockdep warnings which are triggered by fake dependency chain created through flush_work(). * Peter pointed out that flushing another work from a WQ_MEM_RECLAIM wq breaks forward progress guarantee under memory pressure. Condition check accordingly updated. Signed-off-by: Tejun Heo Reported-by: "Rafael J. Wysocki" Tested-by: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: stable@kernel.org --- include/linux/lockdep.h | 3 +++ kernel/workqueue.c | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 71c09b26c759..9f19430c7d07 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -522,12 +522,15 @@ static inline void print_irqtrace_events(struct task_struct *curr) #ifdef CONFIG_DEBUG_LOCK_ALLOC # ifdef CONFIG_PROVE_LOCKING # define lock_map_acquire(l) lock_acquire(l, 0, 0, 0, 2, NULL, _THIS_IP_) +# define lock_map_acquire_read(l) lock_acquire(l, 0, 0, 2, 2, NULL, _THIS_IP_) # else # define lock_map_acquire(l) lock_acquire(l, 0, 0, 0, 1, NULL, _THIS_IP_) +# define lock_map_acquire_read(l) lock_acquire(l, 0, 0, 2, 1, NULL, _THIS_IP_) # endif # define lock_map_release(l) lock_release(l, 1, _THIS_IP_) #else # define lock_map_acquire(l) do { } while (0) +# define lock_map_acquire_read(l) do { } while (0) # define lock_map_release(l) do { } while (0) #endif diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8ee6ec82f88a..930c2390b77e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1840,7 +1840,7 @@ __acquires(&gcwq->lock) spin_unlock_irq(&gcwq->lock); work_clear_pending(work); - lock_map_acquire(&cwq->wq->lockdep_map); + lock_map_acquire_read(&cwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); trace_workqueue_execute_start(work); f(work); @@ -2384,8 +2384,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, insert_wq_barrier(cwq, barr, work, worker); spin_unlock_irq(&gcwq->lock); - lock_map_acquire(&cwq->wq->lockdep_map); + /* + * If @max_active is 1 or rescuer is in use, flushing another work + * item on the same workqueue may lead to deadlock. Make sure the + * flusher is not running on the same workqueue by verifying write + * access. + */ + if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER) + lock_map_acquire(&cwq->wq->lockdep_map); + else + lock_map_acquire_read(&cwq->wq->lockdep_map); lock_map_release(&cwq->wq->lockdep_map); + return true; already_gone: spin_unlock_irq(&gcwq->lock); -- cgit From 42c025f3de9042d9c9abd9a6f6205d1a0f4bcadf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 11 Jan 2011 15:58:49 +0100 Subject: workqueue: note the nested NOT_RUNNING test in worker_clr_flags() isn't a noop The nested NOT_RUNNING test in worker_clr_flags() is slightly misleading in that if NOT_RUNNING were a single flag the nested test would be always %true and thus noop. Add a comment noting that the test isn't a noop. Signed-off-by: Tejun Heo Cc: Hillf Danton Cc: Andrew Morton --- kernel/workqueue.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 930c2390b77e..11869faa6819 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -768,7 +768,11 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) worker->flags &= ~flags; - /* if transitioning out of NOT_RUNNING, increment nr_running */ + /* + * If transitioning out of NOT_RUNNING, increment nr_running. Note + * that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask + * of multiple flags, not a single flag. + */ if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) if (!(worker->flags & WORKER_NOT_RUNNING)) atomic_inc(get_gcwq_nr_running(gcwq->cpu)); -- cgit From 4149efb22da66e326fc48baf80d628834509f7f0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Feb 2011 10:39:03 +0100 Subject: workqueue: add system_freezeable_wq Add system wide freezeable workqueue. Signed-off-by: Tejun Heo Acked-by: Dmitry Torokhov Cc: "Rafael J. Wysocki" --- include/linux/workqueue.h | 4 ++++ kernel/workqueue.c | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel/workqueue.c') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 1ac11586a2f5..de6a755befac 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -286,11 +286,15 @@ enum { * any specific CPU, not concurrency managed, and all queued works are * executed immediately as long as max_active limit is not reached and * resources are available. + * + * system_freezeable_wq is equivalent to system_wq except that it's + * freezeable. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_nrt_wq; extern struct workqueue_struct *system_unbound_wq; +extern struct workqueue_struct *system_freezeable_wq; extern struct workqueue_struct * __alloc_workqueue_key(const char *name, unsigned int flags, int max_active, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 11869faa6819..28f8bd08f0e7 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -249,10 +249,12 @@ struct workqueue_struct *system_wq __read_mostly; struct workqueue_struct *system_long_wq __read_mostly; struct workqueue_struct *system_nrt_wq __read_mostly; struct workqueue_struct *system_unbound_wq __read_mostly; +struct workqueue_struct *system_freezeable_wq __read_mostly; EXPORT_SYMBOL_GPL(system_wq); EXPORT_SYMBOL_GPL(system_long_wq); EXPORT_SYMBOL_GPL(system_nrt_wq); EXPORT_SYMBOL_GPL(system_unbound_wq); +EXPORT_SYMBOL_GPL(system_freezeable_wq); #define CREATE_TRACE_POINTS #include @@ -3764,8 +3766,10 @@ static int __init init_workqueues(void) system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); + system_freezeable_wq = alloc_workqueue("events_freezeable", + WQ_FREEZEABLE, 0); BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || - !system_unbound_wq); + !system_unbound_wq || !system_freezeable_wq); return 0; } early_initcall(init_workqueues); -- cgit From 7576958a9d5a4a677ad7dd40901cdbb6c1110c98 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 14 Feb 2011 14:04:46 +0100 Subject: workqueue: wake up a worker when a rescuer is leaving a gcwq After executing the matching works, a rescuer leaves the gcwq whether there are more pending works or not. This may decrease the concurrency level to zero and stall execution until a new work item is queued on the gcwq. Make rescuer wake up a regular worker when it leaves a gcwq if there are more works to execute, so that execution isn't stalled. Signed-off-by: Tejun Heo Reported-by: Ray Jui Cc: stable@kernel.org --- kernel/workqueue.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 11869faa6819..90a17ca2ad0b 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2047,6 +2047,15 @@ repeat: move_linked_works(work, scheduled, &n); process_scheduled_works(rescuer); + + /* + * Leave this gcwq. If keep_working() is %true, notify a + * regular worker; otherwise, we end up with 0 concurrency + * and stalling the execution. + */ + if (keep_working(gcwq)) + wake_up_worker(gcwq); + spin_unlock_irq(&gcwq->lock); } -- cgit From 58a69cb47ec6991bf006a3e5d202e8571b0327a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 09:25:31 +0100 Subject: workqueue, freezer: unify spelling of 'freeze' + 'able' to 'freezable' There are two spellings in use for 'freeze' + 'able' - 'freezable' and 'freezeable'. The former is the more prominent one. The latter is mostly used by workqueue and in a few other odd places. Unify the spelling to 'freezable'. Signed-off-by: Tejun Heo Reported-by: Alan Stern Acked-by: "Rafael J. Wysocki" Acked-by: Greg Kroah-Hartman Acked-by: Dmitry Torokhov Cc: David Woodhouse Cc: Alex Dubov Cc: "David S. Miller" Cc: Steven Whitehouse --- Documentation/workqueue.txt | 4 ++-- drivers/memstick/core/memstick.c | 2 +- drivers/misc/tifm_core.c | 2 +- drivers/misc/vmw_balloon.c | 2 +- drivers/mtd/nand/r852.c | 2 +- drivers/mtd/sm_ftl.c | 2 +- drivers/net/can/mcp251x.c | 2 +- drivers/tty/serial/max3100.c | 2 +- drivers/tty/serial/max3107.c | 2 +- fs/gfs2/glock.c | 4 ++-- fs/gfs2/main.c | 2 +- include/linux/freezer.h | 2 +- include/linux/sched.h | 2 +- include/linux/workqueue.h | 8 ++++---- kernel/power/main.c | 2 +- kernel/power/process.c | 6 +++--- kernel/workqueue.c | 24 ++++++++++++------------ 17 files changed, 35 insertions(+), 35 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt index 996a27d9b8db..01c513fac40e 100644 --- a/Documentation/workqueue.txt +++ b/Documentation/workqueue.txt @@ -190,9 +190,9 @@ resources, scheduled and executed. * Long running CPU intensive workloads which can be better managed by the system scheduler. - WQ_FREEZEABLE + WQ_FREEZABLE - A freezeable wq participates in the freeze phase of the system + A freezable wq participates in the freeze phase of the system suspend operations. Work items on the wq are drained and no new work item starts execution until thawed. diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index e9a3eab7b0cf..8c1d85e27be4 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -621,7 +621,7 @@ static int __init memstick_init(void) { int rc; - workqueue = create_freezeable_workqueue("kmemstick"); + workqueue = create_freezable_workqueue("kmemstick"); if (!workqueue) return -ENOMEM; diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c index 5f6852dff40b..44d4475a09dd 100644 --- a/drivers/misc/tifm_core.c +++ b/drivers/misc/tifm_core.c @@ -329,7 +329,7 @@ static int __init tifm_init(void) { int rc; - workqueue = create_freezeable_workqueue("tifm"); + workqueue = create_freezable_workqueue("tifm"); if (!workqueue) return -ENOMEM; diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 4d2ea8e80140..6df5a55da110 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -785,7 +785,7 @@ static int __init vmballoon_init(void) if (x86_hyper != &x86_hyper_vmware) return -ENODEV; - vmballoon_wq = create_freezeable_workqueue("vmmemctl"); + vmballoon_wq = create_freezable_workqueue("vmmemctl"); if (!vmballoon_wq) { pr_err("failed to create workqueue\n"); return -ENOMEM; diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c index d9d7efbc77cc..6322d1fb5d62 100644 --- a/drivers/mtd/nand/r852.c +++ b/drivers/mtd/nand/r852.c @@ -930,7 +930,7 @@ int r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) init_completion(&dev->dma_done); - dev->card_workqueue = create_freezeable_workqueue(DRV_NAME); + dev->card_workqueue = create_freezable_workqueue(DRV_NAME); if (!dev->card_workqueue) goto error9; diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c index 67822cf6c025..ac0d6a8613b5 100644 --- a/drivers/mtd/sm_ftl.c +++ b/drivers/mtd/sm_ftl.c @@ -1258,7 +1258,7 @@ static struct mtd_blktrans_ops sm_ftl_ops = { static __init int sm_module_init(void) { int error = 0; - cache_flush_workqueue = create_freezeable_workqueue("smflush"); + cache_flush_workqueue = create_freezable_workqueue("smflush"); if (IS_ERR(cache_flush_workqueue)) return PTR_ERR(cache_flush_workqueue); diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c index 7ab534aee452..7513c4523ac4 100644 --- a/drivers/net/can/mcp251x.c +++ b/drivers/net/can/mcp251x.c @@ -940,7 +940,7 @@ static int mcp251x_open(struct net_device *net) goto open_unlock; } - priv->wq = create_freezeable_workqueue("mcp251x_wq"); + priv->wq = create_freezable_workqueue("mcp251x_wq"); INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler); INIT_WORK(&priv->restart_work, mcp251x_restart_work_handler); diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c index beb1afa27d8d..7b951adac54b 100644 --- a/drivers/tty/serial/max3100.c +++ b/drivers/tty/serial/max3100.c @@ -601,7 +601,7 @@ static int max3100_startup(struct uart_port *port) s->rts = 0; sprintf(b, "max3100-%d", s->minor); - s->workqueue = create_freezeable_workqueue(b); + s->workqueue = create_freezable_workqueue(b); if (!s->workqueue) { dev_warn(&s->spi->dev, "cannot create workqueue\n"); return -EBUSY; diff --git a/drivers/tty/serial/max3107.c b/drivers/tty/serial/max3107.c index 910870edf708..750b4f627315 100644 --- a/drivers/tty/serial/max3107.c +++ b/drivers/tty/serial/max3107.c @@ -833,7 +833,7 @@ static int max3107_startup(struct uart_port *port) struct max3107_port *s = container_of(port, struct max3107_port, port); /* Initialize work queue */ - s->workqueue = create_freezeable_workqueue("max3107"); + s->workqueue = create_freezable_workqueue("max3107"); if (!s->workqueue) { dev_err(&s->spi->dev, "Workqueue creation failed\n"); return -EBUSY; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 08a8beb152e6..7cd9a5a68d59 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1779,11 +1779,11 @@ int __init gfs2_glock_init(void) #endif glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | - WQ_HIGHPRI | WQ_FREEZEABLE, 0); + WQ_HIGHPRI | WQ_FREEZABLE, 0); if (IS_ERR(glock_workqueue)) return PTR_ERR(glock_workqueue); gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", - WQ_MEM_RECLAIM | WQ_FREEZEABLE, + WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); if (IS_ERR(gfs2_delete_workqueue)) { destroy_workqueue(glock_workqueue); diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index ebef7ab6e17e..85ba027d1c4d 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -144,7 +144,7 @@ static int __init init_gfs2_fs(void) error = -ENOMEM; gfs_recovery_wq = alloc_workqueue("gfs_recovery", - WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0); + WQ_MEM_RECLAIM | WQ_FREEZABLE, 0); if (!gfs_recovery_wq) goto fail_wq; diff --git a/include/linux/freezer.h b/include/linux/freezer.h index da7e52b099f3..1effc8b56b4e 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -109,7 +109,7 @@ static inline void freezer_count(void) } /* - * Check if the task should be counted as freezeable by the freezer + * Check if the task should be counted as freezable by the freezer */ static inline int freezer_should_skip(struct task_struct *p) { diff --git a/include/linux/sched.h b/include/linux/sched.h index d747f948b34e..777d8a5ed06b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1744,7 +1744,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ -#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ +#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ /* diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 1ac11586a2f5..f7998a3bf020 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -250,7 +250,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } enum { WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ - WQ_FREEZEABLE = 1 << 2, /* freeze during suspend */ + WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ WQ_HIGHPRI = 1 << 4, /* high priority */ WQ_CPU_INTENSIVE = 1 << 5, /* cpu instensive workqueue */ @@ -318,7 +318,7 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active, /** * alloc_ordered_workqueue - allocate an ordered workqueue * @name: name of the workqueue - * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_MEM_RECLAIM are meaningful) + * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) * * Allocate an ordered workqueue. An ordered workqueue executes at * most one work item at any given time in the queued order. They are @@ -335,8 +335,8 @@ alloc_ordered_workqueue(const char *name, unsigned int flags) #define create_workqueue(name) \ alloc_workqueue((name), WQ_MEM_RECLAIM, 1) -#define create_freezeable_workqueue(name) \ - alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1) +#define create_freezable_workqueue(name) \ + alloc_workqueue((name), WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1) #define create_singlethread_workqueue(name) \ alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1) diff --git a/kernel/power/main.c b/kernel/power/main.c index 7b5db6a8561e..701853042c28 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -326,7 +326,7 @@ EXPORT_SYMBOL_GPL(pm_wq); static int __init pm_start_workqueue(void) { - pm_wq = alloc_workqueue("pm", WQ_FREEZEABLE, 0); + pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0); return pm_wq ? 0 : -ENOMEM; } diff --git a/kernel/power/process.c b/kernel/power/process.c index d6d2a10320e0..0cf3a27a6c9d 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -22,7 +22,7 @@ */ #define TIMEOUT (20 * HZ) -static inline int freezeable(struct task_struct * p) +static inline int freezable(struct task_struct * p) { if ((p == current) || (p->flags & PF_NOFREEZE) || @@ -53,7 +53,7 @@ static int try_to_freeze_tasks(bool sig_only) todo = 0; read_lock(&tasklist_lock); do_each_thread(g, p) { - if (frozen(p) || !freezeable(p)) + if (frozen(p) || !freezable(p)) continue; if (!freeze_task(p, sig_only)) @@ -167,7 +167,7 @@ static void thaw_tasks(bool nosig_only) read_lock(&tasklist_lock); do_each_thread(g, p) { - if (!freezeable(p)) + if (!freezable(p)) continue; if (nosig_only && should_send_signal(p)) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 90a17ca2ad0b..88a3e34f51f6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2965,7 +2965,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name, */ spin_lock(&workqueue_lock); - if (workqueue_freezing && wq->flags & WQ_FREEZEABLE) + if (workqueue_freezing && wq->flags & WQ_FREEZABLE) for_each_cwq_cpu(cpu, wq) get_cwq(cpu, wq)->max_active = 0; @@ -3077,7 +3077,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active) spin_lock_irq(&gcwq->lock); - if (!(wq->flags & WQ_FREEZEABLE) || + if (!(wq->flags & WQ_FREEZABLE) || !(gcwq->flags & GCWQ_FREEZING)) get_cwq(gcwq->cpu, wq)->max_active = max_active; @@ -3327,7 +3327,7 @@ static int __cpuinit trustee_thread(void *__gcwq) * want to get it over with ASAP - spam rescuers, wake up as * many idlers as necessary and create new ones till the * worklist is empty. Note that if the gcwq is frozen, there - * may be frozen works in freezeable cwqs. Don't declare + * may be frozen works in freezable cwqs. Don't declare * completion while frozen. */ while (gcwq->nr_workers != gcwq->nr_idle || @@ -3585,9 +3585,9 @@ EXPORT_SYMBOL_GPL(work_on_cpu); /** * freeze_workqueues_begin - begin freezing workqueues * - * Start freezing workqueues. After this function returns, all - * freezeable workqueues will queue new works to their frozen_works - * list instead of gcwq->worklist. + * Start freezing workqueues. After this function returns, all freezable + * workqueues will queue new works to their frozen_works list instead of + * gcwq->worklist. * * CONTEXT: * Grabs and releases workqueue_lock and gcwq->lock's. @@ -3613,7 +3613,7 @@ void freeze_workqueues_begin(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (cwq && wq->flags & WQ_FREEZEABLE) + if (cwq && wq->flags & WQ_FREEZABLE) cwq->max_active = 0; } @@ -3624,7 +3624,7 @@ void freeze_workqueues_begin(void) } /** - * freeze_workqueues_busy - are freezeable workqueues still busy? + * freeze_workqueues_busy - are freezable workqueues still busy? * * Check whether freezing is complete. This function must be called * between freeze_workqueues_begin() and thaw_workqueues(). @@ -3633,8 +3633,8 @@ void freeze_workqueues_begin(void) * Grabs and releases workqueue_lock. * * RETURNS: - * %true if some freezeable workqueues are still busy. %false if - * freezing is complete. + * %true if some freezable workqueues are still busy. %false if freezing + * is complete. */ bool freeze_workqueues_busy(void) { @@ -3654,7 +3654,7 @@ bool freeze_workqueues_busy(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (!cwq || !(wq->flags & WQ_FREEZEABLE)) + if (!cwq || !(wq->flags & WQ_FREEZABLE)) continue; BUG_ON(cwq->nr_active < 0); @@ -3699,7 +3699,7 @@ void thaw_workqueues(void) list_for_each_entry(wq, &workqueues, list) { struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); - if (!cwq || !(wq->flags & WQ_FREEZEABLE)) + if (!cwq || !(wq->flags & WQ_FREEZABLE)) continue; /* restore max_active and repopulate worklist */ -- cgit From 3233cdbd9fa347a6d6897a94cc6ed0302ae83c4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Feb 2011 18:10:19 +0100 Subject: workqueue: make sure MAYDAY_INITIAL_TIMEOUT is at least 2 jiffies long MAYDAY_INITIAL_TIMEOUT is defined as HZ / 100 and depending on configuration may end up 0 or 1. Even when it's 1, depending on when the mayday timer is added in the current jiffy interval, it may expire way before a jiffy has passed. Make sure MAYDAY_INITIAL_TIMEOUT is at least two to guarantee that at least a full jiffy has passed before calling rescuers. Signed-off-by: Tejun Heo Reported-by: Ray Jui Cc: stable@kernel.org --- kernel/workqueue.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 88a3e34f51f6..ee6578b578ad 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -79,7 +79,9 @@ enum { MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ - MAYDAY_INITIAL_TIMEOUT = HZ / 100, /* call for help after 10ms */ + MAYDAY_INITIAL_TIMEOUT = HZ / 100 >= 2 ? HZ / 100 : 2, + /* call for help after 10ms + (min two ticks) */ MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ CREATE_COOLDOWN = HZ, /* time to breath after fail */ TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */ -- cgit From 24d51add7438f9696a7205927bf9de3c5c787a58 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 21 Feb 2011 09:52:50 +0100 Subject: workqueue: fix build failure introduced by s/freezeable/freezable/ wq:fixes-2.6.38 does s/WQ_FREEZEABLE/WQ_FREEZABLE and wq:for-2.6.39 adds new usage of the flag. The combination of the two creates a build failure after merge. Fix it by renaming all freezeables to freezables. Signed-off-by: Tejun Heo Reported-by: Stephen Rothwell --- include/linux/workqueue.h | 6 +++--- kernel/workqueue.c | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index d110cc4f9fed..f584aba78ca9 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -287,14 +287,14 @@ enum { * executed immediately as long as max_active limit is not reached and * resources are available. * - * system_freezeable_wq is equivalent to system_wq except that it's - * freezeable. + * system_freezable_wq is equivalent to system_wq except that it's + * freezable. */ extern struct workqueue_struct *system_wq; extern struct workqueue_struct *system_long_wq; extern struct workqueue_struct *system_nrt_wq; extern struct workqueue_struct *system_unbound_wq; -extern struct workqueue_struct *system_freezeable_wq; +extern struct workqueue_struct *system_freezable_wq; extern struct workqueue_struct * __alloc_workqueue_key(const char *name, unsigned int flags, int max_active, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 572f559f6cb9..1b64d225f067 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -251,12 +251,12 @@ struct workqueue_struct *system_wq __read_mostly; struct workqueue_struct *system_long_wq __read_mostly; struct workqueue_struct *system_nrt_wq __read_mostly; struct workqueue_struct *system_unbound_wq __read_mostly; -struct workqueue_struct *system_freezeable_wq __read_mostly; +struct workqueue_struct *system_freezable_wq __read_mostly; EXPORT_SYMBOL_GPL(system_wq); EXPORT_SYMBOL_GPL(system_long_wq); EXPORT_SYMBOL_GPL(system_nrt_wq); EXPORT_SYMBOL_GPL(system_unbound_wq); -EXPORT_SYMBOL_GPL(system_freezeable_wq); +EXPORT_SYMBOL_GPL(system_freezable_wq); #define CREATE_TRACE_POINTS #include @@ -3777,10 +3777,10 @@ static int __init init_workqueues(void) system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0); system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); - system_freezeable_wq = alloc_workqueue("events_freezeable", - WQ_FREEZEABLE, 0); + system_freezable_wq = alloc_workqueue("events_freezable", + WQ_FREEZABLE, 0); BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq || - !system_unbound_wq || !system_freezeable_wq); + !system_unbound_wq || !system_freezable_wq); return 0; } early_initcall(init_workqueues); -- cgit From 997772884036e6e121de39322179989154437d9f Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 7 Mar 2011 09:58:33 +0100 Subject: debugobjects: Add hint for better object identification In complex subsystems like mac80211 structures can contain several timers and work structs, so identifying a specific instance from the call trace and object type output of debugobjects can be hard. Allow the subsystems which support debugobjects to provide a hint function. This function returns a pointer to a kernel address (preferrably the objects callback function) which is printed along with the debugobjects type. Add hint methods for timer_list, work_struct and hrtimer. [ tglx: Massaged changelog, made it compile ] Signed-off-by: Stanislaw Gruszka LKML-Reference: <20110307085809.GA9334@redhat.com> Signed-off-by: Thomas Gleixner --- include/linux/debugobjects.h | 5 ++++- kernel/hrtimer.c | 6 ++++++ kernel/timer.c | 6 ++++++ kernel/workqueue.c | 6 ++++++ lib/debugobjects.c | 9 ++++++--- 5 files changed, 28 insertions(+), 4 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h index 597692f1fc8d..65970b811e22 100644 --- a/include/linux/debugobjects.h +++ b/include/linux/debugobjects.h @@ -34,7 +34,10 @@ struct debug_obj { /** * struct debug_obj_descr - object type specific debug description structure + * * @name: name of the object typee + * @debug_hint: function returning address, which have associated + * kernel symbol, to allow identify the object * @fixup_init: fixup function, which is called when the init check * fails * @fixup_activate: fixup function, which is called when the activate check @@ -46,7 +49,7 @@ struct debug_obj { */ struct debug_obj_descr { const char *name; - + void *(*debug_hint) (void *addr); int (*fixup_init) (void *addr, enum debug_obj_state state); int (*fixup_activate) (void *addr, enum debug_obj_state state); int (*fixup_destroy) (void *addr, enum debug_obj_state state); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0c8d7c048615..e38f5a073d01 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -334,6 +334,11 @@ EXPORT_SYMBOL_GPL(ktime_add_safe); static struct debug_obj_descr hrtimer_debug_descr; +static void *hrtimer_debug_hint(void *addr) +{ + return ((struct hrtimer *) addr)->function; +} + /* * fixup_init is called when: * - an active object is initialized @@ -393,6 +398,7 @@ static int hrtimer_fixup_free(void *addr, enum debug_obj_state state) static struct debug_obj_descr hrtimer_debug_descr = { .name = "hrtimer", + .debug_hint = hrtimer_debug_hint, .fixup_init = hrtimer_fixup_init, .fixup_activate = hrtimer_fixup_activate, .fixup_free = hrtimer_fixup_free, diff --git a/kernel/timer.c b/kernel/timer.c index d6459923d245..33a67925d900 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -404,6 +404,11 @@ static void timer_stats_account_timer(struct timer_list *timer) {} static struct debug_obj_descr timer_debug_descr; +static void *timer_debug_hint(void *addr) +{ + return ((struct timer_list *) addr)->function; +} + /* * fixup_init is called when: * - an active object is initialized @@ -477,6 +482,7 @@ static int timer_fixup_free(void *addr, enum debug_obj_state state) static struct debug_obj_descr timer_debug_descr = { .name = "timer_list", + .debug_hint = timer_debug_hint, .fixup_init = timer_fixup_init, .fixup_activate = timer_fixup_activate, .fixup_free = timer_fixup_free, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ee6578b578ad..b5fe4c00eb3c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -316,6 +316,11 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask, static struct debug_obj_descr work_debug_descr; +static void *work_debug_hint(void *addr) +{ + return ((struct work_struct *) addr)->func; +} + /* * fixup_init is called when: * - an active object is initialized @@ -387,6 +392,7 @@ static int work_fixup_free(void *addr, enum debug_obj_state state) static struct debug_obj_descr work_debug_descr = { .name = "work_struct", + .debug_hint = work_debug_hint, .fixup_init = work_fixup_init, .fixup_activate = work_fixup_activate, .fixup_free = work_fixup_free, diff --git a/lib/debugobjects.c b/lib/debugobjects.c index deebcc57d4e6..9d86e45086f5 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -249,14 +249,17 @@ static struct debug_bucket *get_bucket(unsigned long addr) static void debug_print_object(struct debug_obj *obj, char *msg) { + struct debug_obj_descr *descr = obj->descr; static int limit; - if (limit < 5 && obj->descr != descr_test) { + if (limit < 5 && descr != descr_test) { + void *hint = descr->debug_hint ? + descr->debug_hint(obj->object) : NULL; limit++; WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) " - "object type: %s\n", + "object type: %s hint: %pS\n", msg, obj_states[obj->state], obj->astate, - obj->descr->name); + descr->name, hint); } debug_objects_warnings++; } -- cgit From 94dcf29a11b3d20a28790598d701f98484a969da Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 22 Mar 2011 16:30:45 -0700 Subject: kthread: use kthread_create_on_node() ksoftirqd, kworker, migration, and pktgend kthreads can be created with kthread_create_on_node(), to get proper NUMA affinities for their stack and task_struct. Signed-off-by: Eric Dumazet Acked-by: David S. Miller Reviewed-by: Andi Kleen Acked-by: Rusty Russell Acked-by: Tejun Heo Cc: Tony Luck Cc: Fenghua Yu Cc: David Howells Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/softirq.c | 5 ++++- kernel/stop_machine.c | 6 ++++-- kernel/workqueue.c | 6 ++++-- net/core/pktgen.c | 5 ++++- 4 files changed, 16 insertions(+), 6 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/softirq.c b/kernel/softirq.c index 56e5dec837f0..735d87095172 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -845,7 +845,10 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); + p = kthread_create_on_node(run_ksoftirqd, + hcpu, + cpu_to_node(hotcpu), + "ksoftirqd/%d", hotcpu); if (IS_ERR(p)) { printk("ksoftirqd for %i failed\n", hotcpu); return notifier_from_errno(PTR_ERR(p)); diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 2df820b03beb..e3516b29076c 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -301,8 +301,10 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, case CPU_UP_PREPARE: BUG_ON(stopper->thread || stopper->enabled || !list_empty(&stopper->works)); - p = kthread_create(cpu_stopper_thread, stopper, "migration/%d", - cpu); + p = kthread_create_on_node(cpu_stopper_thread, + stopper, + cpu_to_node(cpu), + "migration/%d", cpu); if (IS_ERR(p)) return notifier_from_errno(PTR_ERR(p)); get_task_struct(p); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5ca7ce9ce754..04ef830690ec 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1366,8 +1366,10 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind) worker->id = id; if (!on_unbound_cpu) - worker->task = kthread_create(worker_thread, worker, - "kworker/%u:%d", gcwq->cpu, id); + worker->task = kthread_create_on_node(worker_thread, + worker, + cpu_to_node(gcwq->cpu), + "kworker/%u:%d", gcwq->cpu, id); else worker->task = kthread_create(worker_thread, worker, "kworker/u:%d", id); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 0c55eaa70e39..aeeece72b72f 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3761,7 +3761,10 @@ static int __init pktgen_create_thread(int cpu) list_add_tail(&t->th_list, &pktgen_threads); init_completion(&t->start_done); - p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); + p = kthread_create_on_node(pktgen_thread_worker, + t, + cpu_to_node(cpu), + "kpktgend_%d", cpu); if (IS_ERR(p)) { pr_err("kernel_thread() failed for cpu %d\n", t->cpu); list_del(&t->th_list); -- cgit From 5035b20fa5cd146b66f5f89619c20a4177fb736d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 29 Apr 2011 18:08:37 +0200 Subject: workqueue: fix deadlock in worker_maybe_bind_and_lock() If a rescuer and stop_machine() bringing down a CPU race with each other, they may deadlock on non-preemptive kernel. The CPU won't accept a new task, so the rescuer can't migrate to the target CPU, while stop_machine() can't proceed because the rescuer is holding one of the CPU retrying migration. GCWQ_DISASSOCIATED is never cleared and worker_maybe_bind_and_lock() retries indefinitely. This problem can be reproduced semi reliably while the system is entering suspend. http://thread.gmane.org/gmane.linux.kernel/1122051 A lot of kudos to Thilo-Alexander for reporting this tricky issue and painstaking testing. stable: This affects all kernels with cmwq, so all kernels since and including v2.6.36 need this fix. Signed-off-by: Tejun Heo Reported-by: Thilo-Alexander Ginkel Tested-by: Thilo-Alexander Ginkel Cc: stable@kernel.org --- kernel/workqueue.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 04ef830690ec..e3378e8d3a5c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1291,8 +1291,14 @@ __acquires(&gcwq->lock) return true; spin_unlock_irq(&gcwq->lock); - /* CPU has come up inbetween, retry migration */ + /* + * We've raced with CPU hot[un]plug. Give it a breather + * and retry migration. cond_resched() is required here; + * otherwise, we might deadlock against cpu_stop trying to + * bring down the CPU on non-preemptive kernel. + */ cpu_relax(); + cond_resched(); } } -- cgit