aboutsummaryrefslogtreecommitdiff
path: root/kernel/locking
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/Makefile1
-rw-r--r--kernel/locking/lockdep.c526
-rw-r--r--kernel/locking/lockdep_internals.h6
-rw-r--r--kernel/locking/locktorture.c81
-rw-r--r--kernel/locking/mutex-debug.h17
-rw-r--r--kernel/locking/mutex.c550
-rw-r--r--kernel/locking/mutex.h4
-rw-r--r--kernel/locking/percpu-rwsem.c7
-rw-r--r--kernel/locking/qrwlock.c1
-rw-r--r--kernel/locking/qspinlock.c1
-rw-r--r--kernel/locking/qspinlock_paravirt.h5
-rw-r--r--kernel/locking/qspinlock_stat.h1
-rw-r--r--kernel/locking/rtmutex-debug.c23
-rw-r--r--kernel/locking/rtmutex-debug.h5
-rw-r--r--kernel/locking/rtmutex.c452
-rw-r--r--kernel/locking/rtmutex.h4
-rw-r--r--kernel/locking/rtmutex_common.h26
-rw-r--r--kernel/locking/rwsem-spinlock.c37
-rw-r--r--kernel/locking/rwsem-xadd.c18
-rw-r--r--kernel/locking/rwsem.c7
-rw-r--r--kernel/locking/semaphore.c8
-rw-r--r--kernel/locking/spinlock.c8
-rw-r--r--kernel/locking/spinlock_debug.c86
-rw-r--r--kernel/locking/test-ww_mutex.c645
24 files changed, 1757 insertions, 762 deletions
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 6f88e352cd4f..760158d9d98d 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -28,3 +28,4 @@ obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
+obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 7c38f8f3d97b..7d2499bec5fe 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -28,6 +28,9 @@
#define DISABLE_BRANCH_PROFILING
#include <linux/mutex.h>
#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/sched/task.h>
+#include <linux/sched/mm.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
@@ -658,6 +661,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
struct lockdep_subclass_key *key;
struct hlist_head *hash_head;
struct lock_class *class;
+ bool is_static = false;
if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
debug_locks_off();
@@ -671,10 +675,23 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
/*
* Static locks do not have their class-keys yet - for them the key
- * is the lock object itself:
+ * is the lock object itself. If the lock is in the per cpu area,
+ * the canonical address of the lock (per cpu offset removed) is
+ * used.
*/
- if (unlikely(!lock->key))
- lock->key = (void *)lock;
+ if (unlikely(!lock->key)) {
+ unsigned long can_addr, addr = (unsigned long)lock;
+
+ if (__is_kernel_percpu_address(addr, &can_addr))
+ lock->key = (void *)can_addr;
+ else if (__is_module_percpu_address(addr, &can_addr))
+ lock->key = (void *)can_addr;
+ else if (static_obj(lock))
+ lock->key = (void *)lock;
+ else
+ return ERR_PTR(-EINVAL);
+ is_static = true;
+ }
/*
* NOTE: the class-key must be unique. For dynamic locks, a static
@@ -706,7 +723,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
}
}
- return NULL;
+ return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL);
}
/*
@@ -724,19 +741,18 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
DEBUG_LOCKS_WARN_ON(!irqs_disabled());
class = look_up_lock_class(lock, subclass);
- if (likely(class))
+ if (likely(!IS_ERR_OR_NULL(class)))
goto out_set_class_cache;
/*
* Debug-check: all keys must be persistent!
- */
- if (!static_obj(lock->key)) {
+ */
+ if (IS_ERR(class)) {
debug_locks_off();
printk("INFO: trying to register non-static key.\n");
printk("the code is fine but needs lockdep annotation.\n");
printk("turning off the locking correctness validator.\n");
dump_stack();
-
return NULL;
}
@@ -1141,18 +1157,18 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
if (debug_locks_silent)
return 0;
- printk("\n");
- printk("======================================================\n");
- printk("[ INFO: possible circular locking dependency detected ]\n");
+ pr_warn("\n");
+ pr_warn("======================================================\n");
+ pr_warn("WARNING: possible circular locking dependency detected\n");
print_kernel_ident();
- printk("-------------------------------------------------------\n");
- printk("%s/%d is trying to acquire lock:\n",
+ pr_warn("------------------------------------------------------\n");
+ pr_warn("%s/%d is trying to acquire lock:\n",
curr->comm, task_pid_nr(curr));
print_lock(check_src);
- printk("\nbut task is already holding lock:\n");
+ pr_warn("\nbut task is already holding lock:\n");
print_lock(check_tgt);
- printk("\nwhich lock already depends on the new lock.\n\n");
- printk("\nthe existing dependency chain (in reverse order) is:\n");
+ pr_warn("\nwhich lock already depends on the new lock.\n\n");
+ pr_warn("\nthe existing dependency chain (in reverse order) is:\n");
print_circular_bug_entry(entry, depth);
@@ -1479,13 +1495,13 @@ print_bad_irq_dependency(struct task_struct *curr,
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return 0;
- printk("\n");
- printk("======================================================\n");
- printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
+ pr_warn("\n");
+ pr_warn("=====================================================\n");
+ pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n",
irqclass, irqclass);
print_kernel_ident();
- printk("------------------------------------------------------\n");
- printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
+ pr_warn("-----------------------------------------------------\n");
+ pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
curr->comm, task_pid_nr(curr),
curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
@@ -1493,46 +1509,46 @@ print_bad_irq_dependency(struct task_struct *curr,
curr->softirqs_enabled);
print_lock(next);
- printk("\nand this task is already holding:\n");
+ pr_warn("\nand this task is already holding:\n");
print_lock(prev);
- printk("which would create a new lock dependency:\n");
+ pr_warn("which would create a new lock dependency:\n");
print_lock_name(hlock_class(prev));
- printk(KERN_CONT " ->");
+ pr_cont(" ->");
print_lock_name(hlock_class(next));
- printk(KERN_CONT "\n");
+ pr_cont("\n");
- printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
+ pr_warn("\nbut this new dependency connects a %s-irq-safe lock:\n",
irqclass);
print_lock_name(backwards_entry->class);
- printk("\n... which became %s-irq-safe at:\n", irqclass);
+ pr_warn("\n... which became %s-irq-safe at:\n", irqclass);
print_stack_trace(backwards_entry->class->usage_traces + bit1, 1);
- printk("\nto a %s-irq-unsafe lock:\n", irqclass);
+ pr_warn("\nto a %s-irq-unsafe lock:\n", irqclass);
print_lock_name(forwards_entry->class);
- printk("\n... which became %s-irq-unsafe at:\n", irqclass);
- printk("...");
+ pr_warn("\n... which became %s-irq-unsafe at:\n", irqclass);
+ pr_warn("...");
print_stack_trace(forwards_entry->class->usage_traces + bit2, 1);
- printk("\nother info that might help us debug this:\n\n");
+ pr_warn("\nother info that might help us debug this:\n\n");
print_irq_lock_scenario(backwards_entry, forwards_entry,
hlock_class(prev), hlock_class(next));
lockdep_print_held_locks(curr);
- printk("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
+ pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
if (!save_trace(&prev_root->trace))
return 0;
print_shortest_lock_dependencies(backwards_entry, prev_root);
- printk("\nthe dependencies between the lock to be acquired");
- printk(" and %s-irq-unsafe lock:\n", irqclass);
+ pr_warn("\nthe dependencies between the lock to be acquired");
+ pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
if (!save_trace(&next_root->trace))
return 0;
print_shortest_lock_dependencies(forwards_entry, next_root);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -1708,22 +1724,22 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return 0;
- printk("\n");
- printk("=============================================\n");
- printk("[ INFO: possible recursive locking detected ]\n");
+ pr_warn("\n");
+ pr_warn("============================================\n");
+ pr_warn("WARNING: possible recursive locking detected\n");
print_kernel_ident();
- printk("---------------------------------------------\n");
- printk("%s/%d is trying to acquire lock:\n",
+ pr_warn("--------------------------------------------\n");
+ pr_warn("%s/%d is trying to acquire lock:\n",
curr->comm, task_pid_nr(curr));
print_lock(next);
- printk("\nbut task is already holding lock:\n");
+ pr_warn("\nbut task is already holding lock:\n");
print_lock(prev);
- printk("\nother info that might help us debug this:\n");
+ pr_warn("\nother info that might help us debug this:\n");
print_deadlock_scenario(next, prev);
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -2058,21 +2074,21 @@ static void print_collision(struct task_struct *curr,
struct held_lock *hlock_next,
struct lock_chain *chain)
{
- printk("\n");
- printk("======================\n");
- printk("[chain_key collision ]\n");
+ pr_warn("\n");
+ pr_warn("============================\n");
+ pr_warn("WARNING: chain_key collision\n");
print_kernel_ident();
- printk("----------------------\n");
- printk("%s/%d: ", current->comm, task_pid_nr(current));
- printk("Hash chain already cached but the contents don't match!\n");
+ pr_warn("----------------------------\n");
+ pr_warn("%s/%d: ", current->comm, task_pid_nr(current));
+ pr_warn("Hash chain already cached but the contents don't match!\n");
- printk("Held locks:");
+ pr_warn("Held locks:");
print_chain_keys_held_locks(curr, hlock_next);
- printk("Locks in cached chain:");
+ pr_warn("Locks in cached chain:");
print_chain_keys_chain(chain);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
}
#endif
@@ -2203,7 +2219,7 @@ cache_hit:
* Important for check_no_collision().
*/
if (unlikely(nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)) {
- if (debug_locks_off_graph_unlock())
+ if (!debug_locks_off_graph_unlock())
return 0;
print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!");
@@ -2357,16 +2373,16 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return 0;
- printk("\n");
- printk("=================================\n");
- printk("[ INFO: inconsistent lock state ]\n");
+ pr_warn("\n");
+ pr_warn("================================\n");
+ pr_warn("WARNING: inconsistent lock state\n");
print_kernel_ident();
- printk("---------------------------------\n");
+ pr_warn("--------------------------------\n");
- printk("inconsistent {%s} -> {%s} usage.\n",
+ pr_warn("inconsistent {%s} -> {%s} usage.\n",
usage_str[prev_bit], usage_str[new_bit]);
- printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
+ pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
curr->comm, task_pid_nr(curr),
trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
@@ -2374,16 +2390,16 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
trace_softirqs_enabled(curr));
print_lock(this);
- printk("{%s} state was registered at:\n", usage_str[prev_bit]);
+ pr_warn("{%s} state was registered at:\n", usage_str[prev_bit]);
print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1);
print_irqtrace_events(curr);
- printk("\nother info that might help us debug this:\n");
+ pr_warn("\nother info that might help us debug this:\n");
print_usage_bug_scenario(this);
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -2422,28 +2438,28 @@ print_irq_inversion_bug(struct task_struct *curr,
if (!debug_locks_off_graph_unlock() || debug_locks_silent)
return 0;
- printk("\n");
- printk("=========================================================\n");
- printk("[ INFO: possible irq lock inversion dependency detected ]\n");
+ pr_warn("\n");
+ pr_warn("========================================================\n");
+ pr_warn("WARNING: possible irq lock inversion dependency detected\n");
print_kernel_ident();
- printk("---------------------------------------------------------\n");
- printk("%s/%d just changed the state of lock:\n",
+ pr_warn("--------------------------------------------------------\n");
+ pr_warn("%s/%d just changed the state of lock:\n",
curr->comm, task_pid_nr(curr));
print_lock(this);
if (forwards)
- printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
+ pr_warn("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
else
- printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
+ pr_warn("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
print_lock_name(other->class);
- printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
+ pr_warn("\n\nand interrupts could create inverse lock ordering between them.\n\n");
- printk("\nother info that might help us debug this:\n");
+ pr_warn("\nother info that might help us debug this:\n");
/* Find a middle lock (if one exists) */
depth = get_lock_depth(other);
do {
if (depth == 0 && (entry != root)) {
- printk("lockdep:%s bad path found in chain graph\n", __func__);
+ pr_warn("lockdep:%s bad path found in chain graph\n", __func__);
break;
}
middle = entry;
@@ -2459,12 +2475,12 @@ print_irq_inversion_bug(struct task_struct *curr,
lockdep_print_held_locks(curr);
- printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
+ pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
if (!save_trace(&root->trace))
return 0;
print_shortest_lock_dependencies(other, root);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -2861,6 +2877,8 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
if (unlikely(!debug_locks))
return;
+ gfp_mask = current_gfp_context(gfp_mask);
+
/* no reclaim without waiting on it */
if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
return;
@@ -2870,7 +2888,7 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
return;
/* We're only interested __GFP_FS allocations for now */
- if (!(gfp_mask & __GFP_FS))
+ if (!(gfp_mask & __GFP_FS) || (curr->flags & PF_MEMALLOC_NOFS))
return;
/*
@@ -2879,6 +2897,10 @@ static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)))
return;
+ /* Disable lockdep if explicitly requested */
+ if (gfp_mask & __GFP_NOLOCKDEP)
+ return;
+
mark_held_locks(curr, RECLAIM_FS);
}
@@ -3167,25 +3189,25 @@ print_lock_nested_lock_not_held(struct task_struct *curr,
if (debug_locks_silent)
return 0;
- printk("\n");
- printk("==================================\n");
- printk("[ BUG: Nested lock was not taken ]\n");
+ pr_warn("\n");
+ pr_warn("==================================\n");
+ pr_warn("WARNING: Nested lock was not taken\n");
print_kernel_ident();
- printk("----------------------------------\n");
+ pr_warn("----------------------------------\n");
- printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr));
+ pr_warn("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr));
print_lock(hlock);
- printk("\nbut this task is not holding:\n");
- printk("%s\n", hlock->nest_lock->name);
+ pr_warn("\nbut this task is not holding:\n");
+ pr_warn("%s\n", hlock->nest_lock->name);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
- printk("\nother info that might help us debug this:\n");
+ pr_warn("\nother info that might help us debug this:\n");
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -3260,10 +3282,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (depth) {
hlock = curr->held_locks + depth - 1;
if (hlock->class_idx == class_idx && nest_lock) {
- if (hlock->references)
+ if (hlock->references) {
+ /*
+ * Check: unsigned int references:12, overflow.
+ */
+ if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1))
+ return 0;
+
hlock->references++;
- else
+ } else {
hlock->references = 2;
+ }
return 1;
}
@@ -3373,21 +3402,21 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
if (debug_locks_silent)
return 0;
- printk("\n");
- printk("=====================================\n");
- printk("[ BUG: bad unlock balance detected! ]\n");
+ pr_warn("\n");
+ pr_warn("=====================================\n");
+ pr_warn("WARNING: bad unlock balance detected!\n");
print_kernel_ident();
- printk("-------------------------------------\n");
- printk("%s/%d is trying to release lock (",
+ pr_warn("-------------------------------------\n");
+ pr_warn("%s/%d is trying to release lock (",
curr->comm, task_pid_nr(curr));
print_lockdep_cache(lock);
- printk(KERN_CONT ") at:\n");
+ pr_cont(") at:\n");
print_ip_sym(ip);
- printk("but there are no more locks to release!\n");
- printk("\nother info that might help us debug this:\n");
+ pr_warn("but there are no more locks to release!\n");
+ pr_warn("\nother info that might help us debug this:\n");
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -3410,7 +3439,7 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
* Clearly if the lock hasn't been acquired _ever_, we're not
* holding it either, so report failure.
*/
- if (!class)
+ if (IS_ERR_OR_NULL(class))
return 0;
/*
@@ -3428,13 +3457,67 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
return 0;
}
+/* @depth must not be zero */
+static struct held_lock *find_held_lock(struct task_struct *curr,
+ struct lockdep_map *lock,
+ unsigned int depth, int *idx)
+{
+ struct held_lock *ret, *hlock, *prev_hlock;
+ int i;
+
+ i = depth - 1;
+ hlock = curr->held_locks + i;
+ ret = hlock;
+ if (match_held_lock(hlock, lock))
+ goto out;
+
+ ret = NULL;
+ for (i--, prev_hlock = hlock--;
+ i >= 0;
+ i--, prev_hlock = hlock--) {
+ /*
+ * We must not cross into another context:
+ */
+ if (prev_hlock->irq_context != hlock->irq_context) {
+ ret = NULL;
+ break;
+ }
+ if (match_held_lock(hlock, lock)) {
+ ret = hlock;
+ break;
+ }
+ }
+
+out:
+ *idx = i;
+ return ret;
+}
+
+static int reacquire_held_locks(struct task_struct *curr, unsigned int depth,
+ int idx)
+{
+ struct held_lock *hlock;
+
+ for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) {
+ if (!__lock_acquire(hlock->instance,
+ hlock_class(hlock)->subclass,
+ hlock->trylock,
+ hlock->read, hlock->check,
+ hlock->hardirqs_off,
+ hlock->nest_lock, hlock->acquire_ip,
+ hlock->references, hlock->pin_count))
+ return 1;
+ }
+ return 0;
+}
+
static int
__lock_set_class(struct lockdep_map *lock, const char *name,
struct lock_class_key *key, unsigned int subclass,
unsigned long ip)
{
struct task_struct *curr = current;
- struct held_lock *hlock, *prev_hlock;
+ struct held_lock *hlock;
struct lock_class *class;
unsigned int depth;
int i;
@@ -3447,21 +3530,10 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
if (DEBUG_LOCKS_WARN_ON(!depth))
return 0;
- prev_hlock = NULL;
- for (i = depth-1; i >= 0; i--) {
- hlock = curr->held_locks + i;
- /*
- * We must not cross into another context:
- */
- if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
- break;
- if (match_held_lock(hlock, lock))
- goto found_it;
- prev_hlock = hlock;
- }
- return print_unlock_imbalance_bug(curr, lock, ip);
+ hlock = find_held_lock(curr, lock, depth, &i);
+ if (!hlock)
+ return print_unlock_imbalance_bug(curr, lock, ip);
-found_it:
lockdep_init_map(lock, name, key, 0);
class = register_lock_class(lock, subclass, 0);
hlock->class_idx = class - lock_classes + 1;
@@ -3469,15 +3541,46 @@ found_it:
curr->lockdep_depth = i;
curr->curr_chain_key = hlock->prev_chain_key;
- for (; i < depth; i++) {
- hlock = curr->held_locks + i;
- if (!__lock_acquire(hlock->instance,
- hlock_class(hlock)->subclass, hlock->trylock,
- hlock->read, hlock->check, hlock->hardirqs_off,
- hlock->nest_lock, hlock->acquire_ip,
- hlock->references, hlock->pin_count))
- return 0;
- }
+ if (reacquire_held_locks(curr, depth, i))
+ return 0;
+
+ /*
+ * I took it apart and put it back together again, except now I have
+ * these 'spare' parts.. where shall I put them.
+ */
+ if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth))
+ return 0;
+ return 1;
+}
+
+static int __lock_downgrade(struct lockdep_map *lock, unsigned long ip)
+{
+ struct task_struct *curr = current;
+ struct held_lock *hlock;
+ unsigned int depth;
+ int i;
+
+ depth = curr->lockdep_depth;
+ /*
+ * This function is about (re)setting the class of a held lock,
+ * yet we're not actually holding any locks. Naughty user!
+ */
+ if (DEBUG_LOCKS_WARN_ON(!depth))
+ return 0;
+
+ hlock = find_held_lock(curr, lock, depth, &i);
+ if (!hlock)
+ return print_unlock_imbalance_bug(curr, lock, ip);
+
+ curr->lockdep_depth = i;
+ curr->curr_chain_key = hlock->prev_chain_key;
+
+ WARN(hlock->read, "downgrading a read lock");
+ hlock->read = 1;
+ hlock->acquire_ip = ip;
+
+ if (reacquire_held_locks(curr, depth, i))
+ return 0;
/*
* I took it apart and put it back together again, except now I have
@@ -3499,7 +3602,7 @@ static int
__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
{
struct task_struct *curr = current;
- struct held_lock *hlock, *prev_hlock;
+ struct held_lock *hlock;
unsigned int depth;
int i;
@@ -3518,21 +3621,10 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
* Check whether the lock exists in the current stack
* of held locks:
*/
- prev_hlock = NULL;
- for (i = depth-1; i >= 0; i--) {
- hlock = curr->held_locks + i;
- /*
- * We must not cross into another context:
- */
- if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
- break;
- if (match_held_lock(hlock, lock))
- goto found_it;
- prev_hlock = hlock;
- }
- return print_unlock_imbalance_bug(curr, lock, ip);
+ hlock = find_held_lock(curr, lock, depth, &i);
+ if (!hlock)
+ return print_unlock_imbalance_bug(curr, lock, ip);
-found_it:
if (hlock->instance == lock)
lock_release_holdtime(hlock);
@@ -3559,15 +3651,8 @@ found_it:
curr->lockdep_depth = i;
curr->curr_chain_key = hlock->prev_chain_key;
- for (i++; i < depth; i++) {
- hlock = curr->held_locks + i;
- if (!__lock_acquire(hlock->instance,
- hlock_class(hlock)->subclass, hlock->trylock,
- hlock->read, hlock->check, hlock->hardirqs_off,
- hlock->nest_lock, hlock->acquire_ip,
- hlock->references, hlock->pin_count))
- return 0;
- }
+ if (reacquire_held_locks(curr, depth, i + 1))
+ return 0;
/*
* We had N bottles of beer on the wall, we drank one, but now
@@ -3732,6 +3817,23 @@ void lock_set_class(struct lockdep_map *lock, const char *name,
}
EXPORT_SYMBOL_GPL(lock_set_class);
+void lock_downgrade(struct lockdep_map *lock, unsigned long ip)
+{
+ unsigned long flags;
+
+ if (unlikely(current->lockdep_recursion))
+ return;
+
+ raw_local_irq_save(flags);
+ current->lockdep_recursion = 1;
+ check_flags(flags);
+ if (__lock_downgrade(lock, ip))
+ check_chain_key(current);
+ current->lockdep_recursion = 0;
+ raw_local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(lock_downgrade);
+
/*
* We are not always called with irqs disabled - do that here,
* and also avoid lockdep recursion:
@@ -3852,13 +3954,15 @@ EXPORT_SYMBOL_GPL(lock_unpin_lock);
void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
{
- current->lockdep_reclaim_gfp = gfp_mask;
+ current->lockdep_reclaim_gfp = current_gfp_context(gfp_mask);
}
+EXPORT_SYMBOL_GPL(lockdep_set_current_reclaim_state);
void lockdep_clear_current_reclaim_state(void)
{
current->lockdep_reclaim_gfp = 0;
}
+EXPORT_SYMBOL_GPL(lockdep_clear_current_reclaim_state);
#ifdef CONFIG_LOCK_STAT
static int
@@ -3870,21 +3974,21 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
if (debug_locks_silent)
return 0;
- printk("\n");
- printk("=================================\n");
- printk("[ BUG: bad contention detected! ]\n");
+ pr_warn("\n");
+ pr_warn("=================================\n");
+ pr_warn("WARNING: bad contention detected!\n");
print_kernel_ident();
- printk("---------------------------------\n");
- printk("%s/%d is trying to contend lock (",
+ pr_warn("---------------------------------\n");
+ pr_warn("%s/%d is trying to contend lock (",
curr->comm, task_pid_nr(curr));
print_lockdep_cache(lock);
- printk(KERN_CONT ") at:\n");
+ pr_cont(") at:\n");
print_ip_sym(ip);
- printk("but there are no locks held!\n");
- printk("\nother info that might help us debug this:\n");
+ pr_warn("but there are no locks held!\n");
+ pr_warn("\nother info that might help us debug this:\n");
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
return 0;
@@ -3894,7 +3998,7 @@ static void
__lock_contended(struct lockdep_map *lock, unsigned long ip)
{
struct task_struct *curr = current;
- struct held_lock *hlock, *prev_hlock;
+ struct held_lock *hlock;
struct lock_class_stats *stats;
unsigned int depth;
int i, contention_point, contending_point;
@@ -3907,22 +4011,12 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!depth))
return;
- prev_hlock = NULL;
- for (i = depth-1; i >= 0; i--) {
- hlock = curr->held_locks + i;
- /*
- * We must not cross into another context:
- */
- if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
- break;
- if (match_held_lock(hlock, lock))
- goto found_it;
- prev_hlock = hlock;
+ hlock = find_held_lock(curr, lock, depth, &i);
+ if (!hlock) {
+ print_lock_contention_bug(curr, lock, ip);
+ return;
}
- print_lock_contention_bug(curr, lock, ip);
- return;
-found_it:
if (hlock->instance != lock)
return;
@@ -3946,7 +4040,7 @@ static void
__lock_acquired(struct lockdep_map *lock, unsigned long ip)
{
struct task_struct *curr = current;
- struct held_lock *hlock, *prev_hlock;
+ struct held_lock *hlock;
struct lock_class_stats *stats;
unsigned int depth;
u64 now, waittime = 0;
@@ -3960,22 +4054,12 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
if (DEBUG_LOCKS_WARN_ON(!depth))
return;
- prev_hlock = NULL;
- for (i = depth-1; i >= 0; i--) {
- hlock = curr->held_locks + i;
- /*
- * We must not cross into another context:
- */
- if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
- break;
- if (match_held_lock(hlock, lock))
- goto found_it;
- prev_hlock = hlock;
+ hlock = find_held_lock(curr, lock, depth, &i);
+ if (!hlock) {
+ print_lock_contention_bug(curr, lock, _RET_IP_);
+ return;
}
- print_lock_contention_bug(curr, lock, _RET_IP_);
- return;
-found_it:
if (hlock->instance != lock)
return;
@@ -4163,7 +4247,7 @@ void lockdep_reset_lock(struct lockdep_map *lock)
* If the class exists we look it up and zap it:
*/
class = look_up_lock_class(lock, j);
- if (class)
+ if (!IS_ERR_OR_NULL(class))
zap_class(class);
}
/*
@@ -4234,17 +4318,17 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
if (debug_locks_silent)
return;
- printk("\n");
- printk("=========================\n");
- printk("[ BUG: held lock freed! ]\n");
+ pr_warn("\n");
+ pr_warn("=========================\n");
+ pr_warn("WARNING: held lock freed!\n");
print_kernel_ident();
- printk("-------------------------\n");
- printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
+ pr_warn("-------------------------\n");
+ pr_warn("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
print_lock(hlock);
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
}
@@ -4292,14 +4376,14 @@ static void print_held_locks_bug(void)
if (debug_locks_silent)
return;
- printk("\n");
- printk("=====================================\n");
- printk("[ BUG: %s/%d still has locks held! ]\n",
+ pr_warn("\n");
+ pr_warn("====================================\n");
+ pr_warn("WARNING: %s/%d still has locks held!\n",
current->comm, task_pid_nr(current));
print_kernel_ident();
- printk("-------------------------------------\n");
+ pr_warn("------------------------------------\n");
lockdep_print_held_locks(current);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
}
@@ -4318,10 +4402,10 @@ void debug_show_all_locks(void)
int unlock = 1;
if (unlikely(!debug_locks)) {
- printk("INFO: lockdep is turned off.\n");
+ pr_warn("INFO: lockdep is turned off.\n");
return;
}
- printk("\nShowing all locks held in the system:\n");
+ pr_warn("\nShowing all locks held in the system:\n");
/*
* Here we try to get the tasklist_lock as hard as possible,
@@ -4332,18 +4416,18 @@ void debug_show_all_locks(void)
retry:
if (!read_trylock(&tasklist_lock)) {
if (count == 10)
- printk("hm, tasklist_lock locked, retrying... ");
+ pr_warn("hm, tasklist_lock locked, retrying... ");
if (count) {
count--;
- printk(" #%d", 10-count);
+ pr_cont(" #%d", 10-count);
mdelay(200);
goto retry;
}
- printk(" ignoring it.\n");
+ pr_cont(" ignoring it.\n");
unlock = 0;
} else {
if (count != 10)
- printk(KERN_CONT " locked it.\n");
+ pr_cont(" locked it.\n");
}
do_each_thread(g, p) {
@@ -4361,8 +4445,8 @@ retry:
unlock = 1;
} while_each_thread(g, p);
- printk("\n");
- printk("=============================================\n\n");
+ pr_warn("\n");
+ pr_warn("=============================================\n\n");
if (unlock)
read_unlock(&tasklist_lock);
@@ -4391,12 +4475,12 @@ asmlinkage __visible void lockdep_sys_exit(void)
if (unlikely(curr->lockdep_depth)) {
if (!debug_locks_off())
return;
- printk("\n");
- printk("================================================\n");
- printk("[ BUG: lock held when returning to user space! ]\n");
+ pr_warn("\n");
+ pr_warn("================================================\n");
+ pr_warn("WARNING: lock held when returning to user space!\n");
print_kernel_ident();
- printk("------------------------------------------------\n");
- printk("%s/%d is leaving the kernel with locks still held!\n",
+ pr_warn("------------------------------------------------\n");
+ pr_warn("%s/%d is leaving the kernel with locks still held!\n",
curr->comm, curr->pid);
lockdep_print_held_locks(curr);
}
@@ -4406,19 +4490,15 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
{
struct task_struct *curr = current;
-#ifndef CONFIG_PROVE_RCU_REPEATEDLY
- if (!debug_locks_off())
- return;
-#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
/* Note: the following can be executed concurrently, so be careful. */
- printk("\n");
- printk("===============================\n");
- printk("[ INFO: suspicious RCU usage. ]\n");
+ pr_warn("\n");
+ pr_warn("=============================\n");
+ pr_warn("WARNING: suspicious RCU usage\n");
print_kernel_ident();
- printk("-------------------------------\n");
- printk("%s:%d %s!\n", file, line, s);
- printk("\nother info that might help us debug this:\n\n");
- printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
+ pr_warn("-----------------------------\n");
+ pr_warn("%s:%d %s!\n", file, line, s);
+ pr_warn("\nother info that might help us debug this:\n\n");
+ pr_warn("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
!rcu_lockdep_current_cpu_online()
? "RCU used illegally from offline CPU!\n"
: !rcu_is_watching()
@@ -4445,10 +4525,10 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
* rcu_read_lock_bh() and so on from extended quiescent states.
*/
if (!rcu_is_watching())
- printk("RCU used illegally from extended quiescent state!\n");
+ pr_warn("RCU used illegally from extended quiescent state!\n");
lockdep_print_held_locks(curr);
- printk("\nstack backtrace:\n");
+ pr_warn("\nstack backtrace:\n");
dump_stack();
}
EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index c2b88490d857..c08fbd2f5ba9 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -46,13 +46,13 @@ enum {
(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
/*
- * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text,
+ * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text,
* .data and .bss to fit in required 32MB limit for the kernel. With
- * PROVE_LOCKING we could go over this limit and cause system boot-up problems.
+ * CONFIG_LOCKDEP we could go over this limit and cause system boot-up problems.
* So, reduce the static allocations for lockdeps related structures so that
* everything fits in current required size limit.
*/
-#ifdef CONFIG_PROVE_LOCKING_SMALL
+#ifdef CONFIG_LOCKDEP_SMALL
/*
* MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
* we track.
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index f8c5af52a131..f24582d4dad3 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -32,6 +32,8 @@
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
+#include <linux/rtmutex.h>
#include <linux/atomic.h>
#include <linux/moduleparam.h>
#include <linux/delay.h>
@@ -372,6 +374,78 @@ static struct lock_torture_ops mutex_lock_ops = {
.name = "mutex_lock"
};
+#include <linux/ww_mutex.h>
+static DEFINE_WW_CLASS(torture_ww_class);
+static DEFINE_WW_MUTEX(torture_ww_mutex_0, &torture_ww_class);
+static DEFINE_WW_MUTEX(torture_ww_mutex_1, &torture_ww_class);
+static DEFINE_WW_MUTEX(torture_ww_mutex_2, &torture_ww_class);
+
+static int torture_ww_mutex_lock(void)
+__acquires(torture_ww_mutex_0)
+__acquires(torture_ww_mutex_1)
+__acquires(torture_ww_mutex_2)
+{
+ LIST_HEAD(list);
+ struct reorder_lock {
+ struct list_head link;
+ struct ww_mutex *lock;
+ } locks[3], *ll, *ln;
+ struct ww_acquire_ctx ctx;
+
+ locks[0].lock = &torture_ww_mutex_0;
+ list_add(&locks[0].link, &list);
+
+ locks[1].lock = &torture_ww_mutex_1;
+ list_add(&locks[1].link, &list);
+
+ locks[2].lock = &torture_ww_mutex_2;
+ list_add(&locks[2].link, &list);
+
+ ww_acquire_init(&ctx, &torture_ww_class);
+
+ list_for_each_entry(ll, &list, link) {
+ int err;
+
+ err = ww_mutex_lock(ll->lock, &ctx);
+ if (!err)
+ continue;
+
+ ln = ll;
+ list_for_each_entry_continue_reverse(ln, &list, link)
+ ww_mutex_unlock(ln->lock);
+
+ if (err != -EDEADLK)
+ return err;
+
+ ww_mutex_lock_slow(ll->lock, &ctx);
+ list_move(&ll->link, &list);
+ }
+
+ ww_acquire_fini(&ctx);
+ return 0;
+}
+
+static void torture_ww_mutex_unlock(void)
+__releases(torture_ww_mutex_0)
+__releases(torture_ww_mutex_1)
+__releases(torture_ww_mutex_2)
+{
+ ww_mutex_unlock(&torture_ww_mutex_0);
+ ww_mutex_unlock(&torture_ww_mutex_1);
+ ww_mutex_unlock(&torture_ww_mutex_2);
+}
+
+static struct lock_torture_ops ww_mutex_lock_ops = {
+ .writelock = torture_ww_mutex_lock,
+ .write_delay = torture_mutex_delay,
+ .task_boost = torture_boost_dummy,
+ .writeunlock = torture_ww_mutex_unlock,
+ .readlock = NULL,
+ .read_delay = NULL,
+ .readunlock = NULL,
+ .name = "ww_mutex_lock"
+};
+
#ifdef CONFIG_RT_MUTEXES
static DEFINE_RT_MUTEX(torture_rtmutex);
@@ -780,6 +854,10 @@ static void lock_torture_cleanup(void)
else
lock_torture_print_module_parms(cxt.cur_ops,
"End of test: SUCCESS");
+
+ kfree(cxt.lwsa);
+ kfree(cxt.lrsa);
+
end:
torture_cleanup_end();
}
@@ -793,6 +871,7 @@ static int __init lock_torture_init(void)
&spin_lock_ops, &spin_lock_irq_ops,
&rw_lock_ops, &rw_lock_irq_ops,
&mutex_lock_ops,
+ &ww_mutex_lock_ops,
#ifdef CONFIG_RT_MUTEXES
&rtmutex_lock_ops,
#endif
@@ -924,6 +1003,8 @@ static int __init lock_torture_init(void)
GFP_KERNEL);
if (reader_tasks == NULL) {
VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory");
+ kfree(writer_tasks);
+ writer_tasks = NULL;
firsterr = -ENOMEM;
goto unwind;
}
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h
index a459faa48987..4174417d5309 100644
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -26,20 +26,3 @@ extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
extern void debug_mutex_unlock(struct mutex *lock);
extern void debug_mutex_init(struct mutex *lock, const char *name,
struct lock_class_key *key);
-
-#define spin_lock_mutex(lock, flags) \
- do { \
- struct mutex *l = container_of(lock, struct mutex, wait_lock); \
- \
- DEBUG_LOCKS_WARN_ON(in_interrupt()); \
- local_irq_save(flags); \
- arch_spin_lock(&(lock)->rlock.raw_lock);\
- DEBUG_LOCKS_WARN_ON(l->magic != l); \
- } while (0)
-
-#define spin_unlock_mutex(lock, flags) \
- do { \
- arch_spin_unlock(&(lock)->rlock.raw_lock); \
- local_irq_restore(flags); \
- preempt_check_resched(); \
- } while (0)
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 9b349619f431..858a07590e39 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -19,8 +19,10 @@
*/
#include <linux/mutex.h>
#include <linux/ww_mutex.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/sched/rt.h>
+#include <linux/sched/wake_q.h>
+#include <linux/sched/debug.h>
#include <linux/export.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
@@ -50,16 +52,17 @@ EXPORT_SYMBOL(__mutex_init);
/*
* @owner: contains: 'struct task_struct *' to the current lock owner,
* NULL means not owned. Since task_struct pointers are aligned at
- * ARCH_MIN_TASKALIGN (which is at least sizeof(void *)), we have low
- * bits to store extra state.
+ * at least L1_CACHE_BYTES, we have low bits to store extra state.
*
* Bit0 indicates a non-empty waiter list; unlock must issue a wakeup.
* Bit1 indicates unlock needs to hand the lock to the top-waiter
+ * Bit2 indicates handoff has been done and we're waiting for pickup.
*/
#define MUTEX_FLAG_WAITERS 0x01
#define MUTEX_FLAG_HANDOFF 0x02
+#define MUTEX_FLAG_PICKUP 0x04
-#define MUTEX_FLAGS 0x03
+#define MUTEX_FLAGS 0x07
static inline struct task_struct *__owner_task(unsigned long owner)
{
@@ -72,38 +75,29 @@ static inline unsigned long __owner_flags(unsigned long owner)
}
/*
- * Actual trylock that will work on any unlocked state.
- *
- * When setting the owner field, we must preserve the low flag bits.
- *
- * Be careful with @handoff, only set that in a wait-loop (where you set
- * HANDOFF) to avoid recursive lock attempts.
+ * Trylock variant that retuns the owning task on failure.
*/
-static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
+static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock)
{
unsigned long owner, curr = (unsigned long)current;
owner = atomic_long_read(&lock->owner);
for (;;) { /* must loop, can race against a flag */
unsigned long old, flags = __owner_flags(owner);
+ unsigned long task = owner & ~MUTEX_FLAGS;
- if (__owner_task(owner)) {
- if (handoff && unlikely(__owner_task(owner) == current)) {
- /*
- * Provide ACQUIRE semantics for the lock-handoff.
- *
- * We cannot easily use load-acquire here, since
- * the actual load is a failed cmpxchg, which
- * doesn't imply any barriers.
- *
- * Also, this is a fairly unlikely scenario, and
- * this contains the cost.
- */
- smp_mb(); /* ACQUIRE */
- return true;
- }
+ if (task) {
+ if (likely(task != curr))
+ break;
- return false;
+ if (likely(!(flags & MUTEX_FLAG_PICKUP)))
+ break;
+
+ flags &= ~MUTEX_FLAG_PICKUP;
+ } else {
+#ifdef CONFIG_DEBUG_MUTEXES
+ DEBUG_LOCKS_WARN_ON(flags & MUTEX_FLAG_PICKUP);
+#endif
}
/*
@@ -111,15 +105,24 @@ static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
* past the point where we acquire it. This would be possible
* if we (accidentally) set the bit on an unlocked mutex.
*/
- if (handoff)
- flags &= ~MUTEX_FLAG_HANDOFF;
+ flags &= ~MUTEX_FLAG_HANDOFF;
old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags);
if (old == owner)
- return true;
+ return NULL;
owner = old;
}
+
+ return __owner_task(owner);
+}
+
+/*
+ * Actual trylock that will work on any unlocked state.
+ */
+static inline bool __mutex_trylock(struct mutex *lock)
+{
+ return !__mutex_trylock_or_owner(lock);
}
#ifndef CONFIG_DEBUG_LOCK_ALLOC
@@ -171,9 +174,9 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait
/*
* Give up ownership to a specific task, when @task = NULL, this is equivalent
- * to a regular unlock. Clears HANDOFF, preserves WAITERS. Provides RELEASE
- * semantics like a regular unlock, the __mutex_trylock() provides matching
- * ACQUIRE semantics for the handoff.
+ * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves
+ * WAITERS. Provides RELEASE semantics like a regular unlock, the
+ * __mutex_trylock() provides a matching ACQUIRE semantics for the handoff.
*/
static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
{
@@ -184,10 +187,13 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
+ DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
#endif
new = (owner & MUTEX_FLAG_WAITERS);
new |= (unsigned long)task;
+ if (task)
+ new |= MUTEX_FLAG_PICKUP;
old = atomic_long_cmpxchg_release(&lock->owner, owner, new);
if (old == owner)
@@ -221,9 +227,9 @@ static void __sched __mutex_lock_slowpath(struct mutex *lock);
* (or statically defined) before it can be locked. memset()-ing
* the mutex to 0 is not allowed.
*
- * ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging
- * checks that will enforce the restrictions and will also do
- * deadlock debugging. )
+ * (The CONFIG_DEBUG_MUTEXES .config option turns on debugging
+ * checks that will enforce the restrictions and will also do
+ * deadlock debugging)
*
* This function is similar to (but not equivalent to) down().
*/
@@ -237,8 +243,8 @@ void __sched mutex_lock(struct mutex *lock)
EXPORT_SYMBOL(mutex_lock);
#endif
-static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
- struct ww_acquire_ctx *ww_ctx)
+static __always_inline void
+ww_mutex_lock_acquired(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
{
#ifdef CONFIG_DEBUG_MUTEXES
/*
@@ -277,17 +283,50 @@ static __always_inline void ww_mutex_lock_acquired(struct ww_mutex *ww,
ww_ctx->acquired++;
}
+static inline bool __sched
+__ww_ctx_stamp_after(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
+{
+ return a->stamp - b->stamp <= LONG_MAX &&
+ (a->stamp != b->stamp || a > b);
+}
+
+/*
+ * Wake up any waiters that may have to back off when the lock is held by the
+ * given context.
+ *
+ * Due to the invariants on the wait list, this can only affect the first
+ * waiter with a context.
+ *
+ * The current task must not be on the wait list.
+ */
+static void __sched
+__ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)
+{
+ struct mutex_waiter *cur;
+
+ lockdep_assert_held(&lock->wait_lock);
+
+ list_for_each_entry(cur, &lock->wait_list, list) {
+ if (!cur->ww_ctx)
+ continue;
+
+ if (cur->ww_ctx->acquired > 0 &&
+ __ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) {
+ debug_mutex_wake_waiter(lock, cur);
+ wake_up_process(cur->task);
+ }
+
+ break;
+ }
+}
+
/*
* After acquiring lock with fastpath or when we lost out in contested
* slowpath, set ctx and wake up any waiters so they can recheck.
*/
static __always_inline void
-ww_mutex_set_context_fastpath(struct ww_mutex *lock,
- struct ww_acquire_ctx *ctx)
+ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
- unsigned long flags;
- struct mutex_waiter *cur;
-
ww_mutex_lock_acquired(lock, ctx);
lock->ctx = ctx;
@@ -311,46 +350,79 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock,
* Uh oh, we raced in fastpath, wake up everyone in this case,
* so they can see the new lock->ctx.
*/
- spin_lock_mutex(&lock->base.wait_lock, flags);
- list_for_each_entry(cur, &lock->base.wait_list, list) {
- debug_mutex_wake_waiter(&lock->base, cur);
- wake_up_process(cur->task);
- }
- spin_unlock_mutex(&lock->base.wait_lock, flags);
+ spin_lock(&lock->base.wait_lock);
+ __ww_mutex_wakeup_for_backoff(&lock->base, ctx);
+ spin_unlock(&lock->base.wait_lock);
}
/*
- * After acquiring lock in the slowpath set ctx and wake up any
- * waiters so they can recheck.
+ * After acquiring lock in the slowpath set ctx.
+ *
+ * Unlike for the fast path, the caller ensures that waiters are woken up where
+ * necessary.
*
* Callers must hold the mutex wait_lock.
*/
static __always_inline void
-ww_mutex_set_context_slowpath(struct ww_mutex *lock,
- struct ww_acquire_ctx *ctx)
+ww_mutex_set_context_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
- struct mutex_waiter *cur;
-
ww_mutex_lock_acquired(lock, ctx);
lock->ctx = ctx;
+}
+
+#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
+
+static inline
+bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
+ struct mutex_waiter *waiter)
+{
+ struct ww_mutex *ww;
+
+ ww = container_of(lock, struct ww_mutex, base);
/*
- * Give any possible sleeping processes the chance to wake up,
- * so they can recheck if they have to back off.
+ * If ww->ctx is set the contents are undefined, only
+ * by acquiring wait_lock there is a guarantee that
+ * they are not invalid when reading.
+ *
+ * As such, when deadlock detection needs to be
+ * performed the optimistic spinning cannot be done.
+ *
+ * Check this in every inner iteration because we may
+ * be racing against another thread's ww_mutex_lock.
*/
- list_for_each_entry(cur, &lock->base.wait_list, list) {
- debug_mutex_wake_waiter(&lock->base, cur);
- wake_up_process(cur->task);
- }
+ if (ww_ctx->acquired > 0 && READ_ONCE(ww->ctx))
+ return false;
+
+ /*
+ * If we aren't on the wait list yet, cancel the spin
+ * if there are waiters. We want to avoid stealing the
+ * lock from a waiter with an earlier stamp, since the
+ * other thread may already own a lock that we also
+ * need.
+ */
+ if (!waiter && (atomic_long_read(&lock->owner) & MUTEX_FLAG_WAITERS))
+ return false;
+
+ /*
+ * Similarly, stop spinning if we are no longer the
+ * first waiter.
+ */
+ if (waiter && !__mutex_waiter_is_first(lock, waiter))
+ return false;
+
+ return true;
}
-#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
/*
- * Look out! "owner" is an entirely speculative pointer
- * access and not reliable.
+ * Look out! "owner" is an entirely speculative pointer access and not
+ * reliable.
+ *
+ * "noinline" so that this function shows up on perf profiles.
*/
static noinline
-bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
+bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
+ struct ww_acquire_ctx *ww_ctx, struct mutex_waiter *waiter)
{
bool ret = true;
@@ -373,6 +445,11 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
break;
}
+ if (ww_ctx && !ww_mutex_spin_on_owner(lock, ww_ctx, waiter)) {
+ ret = false;
+ break;
+ }
+
cpu_relax();
}
rcu_read_unlock();
@@ -431,12 +508,10 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
* with the spinner at the head of the OSQ, if present, until the owner is
* changed to itself.
*/
-static bool mutex_optimistic_spin(struct mutex *lock,
- struct ww_acquire_ctx *ww_ctx,
- const bool use_ww_ctx, const bool waiter)
+static __always_inline bool
+mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
+ const bool use_ww_ctx, struct mutex_waiter *waiter)
{
- struct task_struct *task = current;
-
if (!waiter) {
/*
* The purpose of the mutex_can_spin_on_owner() function is
@@ -460,40 +535,17 @@ static bool mutex_optimistic_spin(struct mutex *lock,
for (;;) {
struct task_struct *owner;
- if (use_ww_ctx && ww_ctx->acquired > 0) {
- struct ww_mutex *ww;
-
- ww = container_of(lock, struct ww_mutex, base);
- /*
- * If ww->ctx is set the contents are undefined, only
- * by acquiring wait_lock there is a guarantee that
- * they are not invalid when reading.
- *
- * As such, when deadlock detection needs to be
- * performed the optimistic spinning cannot be done.
- */
- if (READ_ONCE(ww->ctx))
- goto fail_unlock;
- }
+ /* Try to acquire the mutex... */
+ owner = __mutex_trylock_or_owner(lock);
+ if (!owner)
+ break;
/*
- * If there's an owner, wait for it to either
+ * There's an owner, wait for it to either
* release the lock or go to sleep.
*/
- owner = __mutex_owner(lock);
- if (owner) {
- if (waiter && owner == task) {
- smp_mb(); /* ACQUIRE */
- break;
- }
-
- if (!mutex_spin_on_owner(lock, owner))
- goto fail_unlock;
- }
-
- /* Try to acquire the mutex if it is unlocked. */
- if (__mutex_trylock(lock, waiter))
- break;
+ if (!mutex_spin_on_owner(lock, owner, ww_ctx, waiter))
+ goto fail_unlock;
/*
* The cpu_relax() call is a compiler barrier which forces
@@ -532,9 +584,9 @@ fail:
return false;
}
#else
-static bool mutex_optimistic_spin(struct mutex *lock,
- struct ww_acquire_ctx *ww_ctx,
- const bool use_ww_ctx, const bool waiter)
+static __always_inline bool
+mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
+ const bool use_ww_ctx, struct mutex_waiter *waiter)
{
return false;
}
@@ -594,23 +646,88 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock)
EXPORT_SYMBOL(ww_mutex_unlock);
static inline int __sched
-__ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
+__ww_mutex_lock_check_stamp(struct mutex *lock, struct mutex_waiter *waiter,
+ struct ww_acquire_ctx *ctx)
{
struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
+ struct mutex_waiter *cur;
+
+ if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx))
+ goto deadlock;
+
+ /*
+ * If there is a waiter in front of us that has a context, then its
+ * stamp is earlier than ours and we must back off.
+ */
+ cur = waiter;
+ list_for_each_entry_continue_reverse(cur, &lock->wait_list, list) {
+ if (cur->ww_ctx)
+ goto deadlock;
+ }
+
+ return 0;
- if (!hold_ctx)
+deadlock:
+#ifdef CONFIG_DEBUG_MUTEXES
+ DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
+ ctx->contending_lock = ww;
+#endif
+ return -EDEADLK;
+}
+
+static inline int __sched
+__ww_mutex_add_waiter(struct mutex_waiter *waiter,
+ struct mutex *lock,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ struct mutex_waiter *cur;
+ struct list_head *pos;
+
+ if (!ww_ctx) {
+ list_add_tail(&waiter->list, &lock->wait_list);
return 0;
+ }
- if (ctx->stamp - hold_ctx->stamp <= LONG_MAX &&
- (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) {
+ /*
+ * Add the waiter before the first waiter with a higher stamp.
+ * Waiters without a context are skipped to avoid starving
+ * them.
+ */
+ pos = &lock->wait_list;
+ list_for_each_entry_reverse(cur, &lock->wait_list, list) {
+ if (!cur->ww_ctx)
+ continue;
+
+ if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {
+ /* Back off immediately if necessary. */
+ if (ww_ctx->acquired > 0) {
#ifdef CONFIG_DEBUG_MUTEXES
- DEBUG_LOCKS_WARN_ON(ctx->contending_lock);
- ctx->contending_lock = ww;
+ struct ww_mutex *ww;
+
+ ww = container_of(lock, struct ww_mutex, base);
+ DEBUG_LOCKS_WARN_ON(ww_ctx->contending_lock);
+ ww_ctx->contending_lock = ww;
#endif
- return -EDEADLK;
+ return -EDEADLK;
+ }
+
+ break;
+ }
+
+ pos = &cur->list;
+
+ /*
+ * Wake up the waiter so that it gets a chance to back
+ * off.
+ */
+ if (cur->ww_ctx->acquired > 0) {
+ debug_mutex_wake_waiter(lock, cur);
+ wake_up_process(cur->task);
+ }
}
+ list_add_tail(&waiter->list, pos);
return 0;
}
@@ -622,15 +739,15 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
struct lockdep_map *nest_lock, unsigned long ip,
struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx)
{
- struct task_struct *task = current;
struct mutex_waiter waiter;
- unsigned long flags;
bool first = false;
struct ww_mutex *ww;
int ret;
- if (use_ww_ctx) {
- ww = container_of(lock, struct ww_mutex, base);
+ might_sleep();
+
+ ww = container_of(lock, struct ww_mutex, base);
+ if (use_ww_ctx && ww_ctx) {
if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
return -EALREADY;
}
@@ -638,36 +755,54 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
preempt_disable();
mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
- if (__mutex_trylock(lock, false) ||
- mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) {
+ if (__mutex_trylock(lock) ||
+ mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, NULL)) {
/* got the lock, yay! */
lock_acquired(&lock->dep_map, ip);
- if (use_ww_ctx)
+ if (use_ww_ctx && ww_ctx)
ww_mutex_set_context_fastpath(ww, ww_ctx);
preempt_enable();
return 0;
}
- spin_lock_mutex(&lock->wait_lock, flags);
+ spin_lock(&lock->wait_lock);
/*
* After waiting to acquire the wait_lock, try again.
*/
- if (__mutex_trylock(lock, false))
+ if (__mutex_trylock(lock)) {
+ if (use_ww_ctx && ww_ctx)
+ __ww_mutex_wakeup_for_backoff(lock, ww_ctx);
+
goto skip_wait;
+ }
debug_mutex_lock_common(lock, &waiter);
- debug_mutex_add_waiter(lock, &waiter, task);
+ debug_mutex_add_waiter(lock, &waiter, current);
+
+ lock_contended(&lock->dep_map, ip);
- /* add waiting tasks to the end of the waitqueue (FIFO): */
- list_add_tail(&waiter.list, &lock->wait_list);
- waiter.task = task;
+ if (!use_ww_ctx) {
+ /* add waiting tasks to the end of the waitqueue (FIFO): */
+ list_add_tail(&waiter.list, &lock->wait_list);
+
+#ifdef CONFIG_DEBUG_MUTEXES
+ waiter.ww_ctx = MUTEX_POISON_WW_CTX;
+#endif
+ } else {
+ /* Add in stamp order, waking up waiters that must back off. */
+ ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx);
+ if (ret)
+ goto err_early_backoff;
+
+ waiter.ww_ctx = ww_ctx;
+ }
+
+ waiter.task = current;
if (__mutex_waiter_is_first(lock, &waiter))
__mutex_set_flag(lock, MUTEX_FLAG_WAITERS);
- lock_contended(&lock->dep_map, ip);
-
- set_task_state(task, state);
+ set_current_state(state);
for (;;) {
/*
* Once we hold wait_lock, we're serialized against
@@ -675,7 +810,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* before testing the error conditions to make sure we pick up
* the handoff.
*/
- if (__mutex_trylock(lock, first))
+ if (__mutex_trylock(lock))
goto acquired;
/*
@@ -683,42 +818,47 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* wait_lock. This ensures the lock cancellation is ordered
* against mutex_unlock() and wake-ups do not go missing.
*/
- if (unlikely(signal_pending_state(state, task))) {
+ if (unlikely(signal_pending_state(state, current))) {
ret = -EINTR;
goto err;
}
- if (use_ww_ctx && ww_ctx->acquired > 0) {
- ret = __ww_mutex_lock_check_stamp(lock, ww_ctx);
+ if (use_ww_ctx && ww_ctx && ww_ctx->acquired > 0) {
+ ret = __ww_mutex_lock_check_stamp(lock, &waiter, ww_ctx);
if (ret)
goto err;
}
- spin_unlock_mutex(&lock->wait_lock, flags);
+ spin_unlock(&lock->wait_lock);
schedule_preempt_disabled();
- if (!first && __mutex_waiter_is_first(lock, &waiter)) {
- first = true;
- __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
+ /*
+ * ww_mutex needs to always recheck its position since its waiter
+ * list is not FIFO ordered.
+ */
+ if ((use_ww_ctx && ww_ctx) || !first) {
+ first = __mutex_waiter_is_first(lock, &waiter);
+ if (first)
+ __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
}
- set_task_state(task, state);
+ set_current_state(state);
/*
* Here we order against unlock; we must either see it change
* state back to RUNNING and fall through the next schedule(),
* or we must see its unlock and acquire.
*/
- if ((first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)) ||
- __mutex_trylock(lock, first))
+ if (__mutex_trylock(lock) ||
+ (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, &waiter)))
break;
- spin_lock_mutex(&lock->wait_lock, flags);
+ spin_lock(&lock->wait_lock);
}
- spin_lock_mutex(&lock->wait_lock, flags);
+ spin_lock(&lock->wait_lock);
acquired:
- __set_task_state(task, TASK_RUNNING);
+ __set_current_state(TASK_RUNNING);
- mutex_remove_waiter(lock, &waiter, task);
+ mutex_remove_waiter(lock, &waiter, current);
if (likely(list_empty(&lock->wait_list)))
__mutex_clear_flag(lock, MUTEX_FLAGS);
@@ -728,30 +868,44 @@ skip_wait:
/* got the lock - cleanup and rejoice! */
lock_acquired(&lock->dep_map, ip);
- if (use_ww_ctx)
+ if (use_ww_ctx && ww_ctx)
ww_mutex_set_context_slowpath(ww, ww_ctx);
- spin_unlock_mutex(&lock->wait_lock, flags);
+ spin_unlock(&lock->wait_lock);
preempt_enable();
return 0;
err:
- __set_task_state(task, TASK_RUNNING);
- mutex_remove_waiter(lock, &waiter, task);
- spin_unlock_mutex(&lock->wait_lock, flags);
+ __set_current_state(TASK_RUNNING);
+ mutex_remove_waiter(lock, &waiter, current);
+err_early_backoff:
+ spin_unlock(&lock->wait_lock);
debug_mutex_free_waiter(&waiter);
mutex_release(&lock->dep_map, 1, ip);
preempt_enable();
return ret;
}
+static int __sched
+__mutex_lock(struct mutex *lock, long state, unsigned int subclass,
+ struct lockdep_map *nest_lock, unsigned long ip)
+{
+ return __mutex_lock_common(lock, state, subclass, nest_lock, ip, NULL, false);
+}
+
+static int __sched
+__ww_mutex_lock(struct mutex *lock, long state, unsigned int subclass,
+ struct lockdep_map *nest_lock, unsigned long ip,
+ struct ww_acquire_ctx *ww_ctx)
+{
+ return __mutex_lock_common(lock, state, subclass, nest_lock, ip, ww_ctx, true);
+}
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void __sched
mutex_lock_nested(struct mutex *lock, unsigned int subclass)
{
- might_sleep();
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
- subclass, NULL, _RET_IP_, NULL, 0);
+ __mutex_lock(lock, TASK_UNINTERRUPTIBLE, subclass, NULL, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_nested);
@@ -759,30 +913,38 @@ EXPORT_SYMBOL_GPL(mutex_lock_nested);
void __sched
_mutex_lock_nest_lock(struct mutex *lock, struct lockdep_map *nest)
{
- might_sleep();
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
- 0, nest, _RET_IP_, NULL, 0);
+ __mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, nest, _RET_IP_);
}
EXPORT_SYMBOL_GPL(_mutex_lock_nest_lock);
int __sched
mutex_lock_killable_nested(struct mutex *lock, unsigned int subclass)
{
- might_sleep();
- return __mutex_lock_common(lock, TASK_KILLABLE,
- subclass, NULL, _RET_IP_, NULL, 0);
+ return __mutex_lock(lock, TASK_KILLABLE, subclass, NULL, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_killable_nested);
int __sched
mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
{
- might_sleep();
- return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
- subclass, NULL, _RET_IP_, NULL, 0);
+ return __mutex_lock(lock, TASK_INTERRUPTIBLE, subclass, NULL, _RET_IP_);
}
EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
+void __sched
+mutex_lock_io_nested(struct mutex *lock, unsigned int subclass)
+{
+ int token;
+
+ might_sleep();
+
+ token = io_schedule_prepare();
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE,
+ subclass, NULL, _RET_IP_, NULL, 0);
+ io_schedule_finish(token);
+}
+EXPORT_SYMBOL_GPL(mutex_lock_io_nested);
+
static inline int
ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
@@ -810,35 +972,37 @@ ww_mutex_deadlock_injection(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
}
int __sched
-__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
might_sleep();
- ret = __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE,
- 0, &ctx->dep_map, _RET_IP_, ctx, 1);
- if (!ret && ctx->acquired > 1)
+ ret = __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE,
+ 0, ctx ? &ctx->dep_map : NULL, _RET_IP_,
+ ctx);
+ if (!ret && ctx && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
return ret;
}
-EXPORT_SYMBOL_GPL(__ww_mutex_lock);
+EXPORT_SYMBOL_GPL(ww_mutex_lock);
int __sched
-__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
int ret;
might_sleep();
- ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE,
- 0, &ctx->dep_map, _RET_IP_, ctx, 1);
+ ret = __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE,
+ 0, ctx ? &ctx->dep_map : NULL, _RET_IP_,
+ ctx);
- if (!ret && ctx->acquired > 1)
+ if (!ret && ctx && ctx->acquired > 1)
return ww_mutex_deadlock_injection(lock, ctx);
return ret;
}
-EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
+EXPORT_SYMBOL_GPL(ww_mutex_lock_interruptible);
#endif
@@ -848,8 +1012,8 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock_interruptible);
static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigned long ip)
{
struct task_struct *next = NULL;
- unsigned long owner, flags;
DEFINE_WAKE_Q(wake_q);
+ unsigned long owner;
mutex_release(&lock->dep_map, 1, ip);
@@ -866,6 +1030,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
+ DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
#endif
if (owner & MUTEX_FLAG_HANDOFF)
@@ -883,7 +1048,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
owner = old;
}
- spin_lock_mutex(&lock->wait_lock, flags);
+ spin_lock(&lock->wait_lock);
debug_mutex_unlock(lock);
if (!list_empty(&lock->wait_list)) {
/* get the first entry from the wait-list: */
@@ -900,7 +1065,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
if (owner & MUTEX_FLAG_HANDOFF)
__mutex_handoff(lock, next);
- spin_unlock_mutex(&lock->wait_lock, flags);
+ spin_unlock(&lock->wait_lock);
wake_up_q(&wake_q);
}
@@ -950,40 +1115,47 @@ int __sched mutex_lock_killable(struct mutex *lock)
}
EXPORT_SYMBOL(mutex_lock_killable);
+void __sched mutex_lock_io(struct mutex *lock)
+{
+ int token;
+
+ token = io_schedule_prepare();
+ mutex_lock(lock);
+ io_schedule_finish(token);
+}
+EXPORT_SYMBOL_GPL(mutex_lock_io);
+
static noinline void __sched
__mutex_lock_slowpath(struct mutex *lock)
{
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0,
- NULL, _RET_IP_, NULL, 0);
+ __mutex_lock(lock, TASK_UNINTERRUPTIBLE, 0, NULL, _RET_IP_);
}
static noinline int __sched
__mutex_lock_killable_slowpath(struct mutex *lock)
{
- return __mutex_lock_common(lock, TASK_KILLABLE, 0,
- NULL, _RET_IP_, NULL, 0);
+ return __mutex_lock(lock, TASK_KILLABLE, 0, NULL, _RET_IP_);
}
static noinline int __sched
__mutex_lock_interruptible_slowpath(struct mutex *lock)
{
- return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0,
- NULL, _RET_IP_, NULL, 0);
+ return __mutex_lock(lock, TASK_INTERRUPTIBLE, 0, NULL, _RET_IP_);
}
static noinline int __sched
__ww_mutex_lock_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
- return __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE, 0,
- NULL, _RET_IP_, ctx, 1);
+ return __ww_mutex_lock(&lock->base, TASK_UNINTERRUPTIBLE, 0, NULL,
+ _RET_IP_, ctx);
}
static noinline int __sched
__ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
struct ww_acquire_ctx *ctx)
{
- return __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE, 0,
- NULL, _RET_IP_, ctx, 1);
+ return __ww_mutex_lock(&lock->base, TASK_INTERRUPTIBLE, 0, NULL,
+ _RET_IP_, ctx);
}
#endif
@@ -1004,7 +1176,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
*/
int __sched mutex_trylock(struct mutex *lock)
{
- bool locked = __mutex_trylock(lock, false);
+ bool locked = __mutex_trylock(lock);
if (locked)
mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
@@ -1015,32 +1187,34 @@ EXPORT_SYMBOL(mutex_trylock);
#ifndef CONFIG_DEBUG_LOCK_ALLOC
int __sched
-__ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
might_sleep();
if (__mutex_trylock_fast(&lock->base)) {
- ww_mutex_set_context_fastpath(lock, ctx);
+ if (ctx)
+ ww_mutex_set_context_fastpath(lock, ctx);
return 0;
}
return __ww_mutex_lock_slowpath(lock, ctx);
}
-EXPORT_SYMBOL(__ww_mutex_lock);
+EXPORT_SYMBOL(ww_mutex_lock);
int __sched
-__ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
+ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
{
might_sleep();
if (__mutex_trylock_fast(&lock->base)) {
- ww_mutex_set_context_fastpath(lock, ctx);
+ if (ctx)
+ ww_mutex_set_context_fastpath(lock, ctx);
return 0;
}
return __ww_mutex_lock_interruptible_slowpath(lock, ctx);
}
-EXPORT_SYMBOL(__ww_mutex_lock_interruptible);
+EXPORT_SYMBOL(ww_mutex_lock_interruptible);
#endif
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 4410a4af42a3..6ebc1902f779 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -9,10 +9,6 @@
* !CONFIG_DEBUG_MUTEXES case. Most of them are NOPs:
*/
-#define spin_lock_mutex(lock, flags) \
- do { spin_lock(lock); (void)(flags); } while (0)
-#define spin_unlock_mutex(lock, flags) \
- do { spin_unlock(lock); (void)(flags); } while (0)
#define mutex_remove_waiter(lock, waiter, task) \
__list_del((waiter)->list.prev, (waiter)->list.next)
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index ce182599cf2e..883cf1b92d90 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -1,7 +1,6 @@
#include <linux/atomic.h>
#include <linux/rwsem.h>
#include <linux/percpu.h>
-#include <linux/wait.h>
#include <linux/lockdep.h>
#include <linux/percpu-rwsem.h>
#include <linux/rcupdate.h>
@@ -18,7 +17,7 @@ int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
__init_rwsem(&sem->rw_sem, name, rwsem_key);
- init_waitqueue_head(&sem->writer);
+ rcuwait_init(&sem->writer);
sem->readers_block = 0;
return 0;
}
@@ -103,7 +102,7 @@ void __percpu_up_read(struct percpu_rw_semaphore *sem)
__this_cpu_dec(*sem->read_count);
/* Prod writer to recheck readers_active */
- wake_up(&sem->writer);
+ rcuwait_wake_up(&sem->writer);
}
EXPORT_SYMBOL_GPL(__percpu_up_read);
@@ -160,7 +159,7 @@ void percpu_down_write(struct percpu_rw_semaphore *sem)
*/
/* Wait for all now active readers to complete. */
- wait_event(sem->writer, readers_active_check(sem));
+ rcuwait_wait_event(&sem->writer, readers_active_check(sem));
}
EXPORT_SYMBOL_GPL(percpu_down_write);
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index cc3ed0ccdfa2..2655f26ec882 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -20,6 +20,7 @@
#include <linux/cpumask.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
+#include <linux/spinlock.h>
#include <asm/qrwlock.h>
/*
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index b2caec7315af..fd24153e8a48 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -28,6 +28,7 @@
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/mutex.h>
+#include <linux/prefetch.h>
#include <asm/byteorder.h>
#include <asm/qspinlock.h>
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index e3b5520005db..4ccfcaae5b89 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -193,7 +193,8 @@ void __init __pv_init_lock_hash(void)
*/
pv_lock_hash = alloc_large_system_hash("PV qspinlock",
sizeof(struct pv_hash_entry),
- pv_hash_size, 0, HASH_EARLY,
+ pv_hash_size, 0,
+ HASH_EARLY | HASH_ZERO,
&pv_lock_hash_bits, NULL,
pv_hash_size, pv_hash_size);
}
@@ -263,7 +264,7 @@ pv_wait_early(struct pv_node *prev, int loop)
if ((loop & PV_PREV_CHECK_MASK) != 0)
return false;
- return READ_ONCE(prev->state) != vcpu_running;
+ return READ_ONCE(prev->state) != vcpu_running || vcpu_is_preempted(prev->cpu);
}
/*
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index e852be4851fc..4a30ef63c607 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -63,6 +63,7 @@ enum qlock_stats {
*/
#include <linux/debugfs.h>
#include <linux/sched.h>
+#include <linux/sched/clock.h>
#include <linux/fs.h>
static const char * const qstat_names[qstat_num + 1] = {
diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c
index 62b6cee8ea7f..ac35e648b0e5 100644
--- a/kernel/locking/rtmutex-debug.c
+++ b/kernel/locking/rtmutex-debug.c
@@ -18,6 +18,7 @@
*/
#include <linux/sched.h>
#include <linux/sched/rt.h>
+#include <linux/sched/debug.h>
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/spinlock.h>
@@ -101,10 +102,11 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
return;
}
- printk("\n============================================\n");
- printk( "[ BUG: circular locking deadlock detected! ]\n");
- printk("%s\n", print_tainted());
- printk( "--------------------------------------------\n");
+ pr_warn("\n");
+ pr_warn("============================================\n");
+ pr_warn("WARNING: circular locking deadlock detected!\n");
+ pr_warn("%s\n", print_tainted());
+ pr_warn("--------------------------------------------\n");
printk("%s/%d is deadlocking current task %s/%d\n\n",
task->comm, task_pid_nr(task),
current->comm, task_pid_nr(current));
@@ -164,21 +166,16 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
memset(waiter, 0x22, sizeof(*waiter));
}
-void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
+void debug_rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key)
{
/*
* Make sure we are not reinitializing a held lock:
*/
debug_check_no_locks_freed((void *)lock, sizeof(*lock));
lock->name = name;
-}
-
-void
-rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
-{
-}
-void rt_mutex_deadlock_account_unlock(struct task_struct *task)
-{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ lockdep_init_map(&lock->dep_map, name, key, 0);
+#endif
}
diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h
index d0519c3432b6..5078c6ddf4a5 100644
--- a/kernel/locking/rtmutex-debug.h
+++ b/kernel/locking/rtmutex-debug.h
@@ -9,12 +9,9 @@
* This file contains macros used solely by rtmutex.c. Debug version.
*/
-extern void
-rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
-extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
-extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
+extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key);
extern void debug_rt_mutex_lock(struct rt_mutex *lock);
extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 2f443ed2320a..649dc9d3951a 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -12,9 +12,11 @@
*/
#include <linux/spinlock.h>
#include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/sched/rt.h>
#include <linux/sched/deadline.h>
+#include <linux/sched/wake_q.h>
+#include <linux/sched/debug.h>
#include <linux/timer.h>
#include "rtmutex_common.h"
@@ -222,6 +224,12 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
}
#endif
+/*
+ * Only use with rt_mutex_waiter_{less,equal}()
+ */
+#define task_to_waiter(p) \
+ &(struct rt_mutex_waiter){ .prio = (p)->prio, .deadline = (p)->dl.deadline }
+
static inline int
rt_mutex_waiter_less(struct rt_mutex_waiter *left,
struct rt_mutex_waiter *right)
@@ -236,12 +244,30 @@ rt_mutex_waiter_less(struct rt_mutex_waiter *left,
* then right waiter has a dl_prio() too.
*/
if (dl_prio(left->prio))
- return dl_time_before(left->task->dl.deadline,
- right->task->dl.deadline);
+ return dl_time_before(left->deadline, right->deadline);
return 0;
}
+static inline int
+rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
+ struct rt_mutex_waiter *right)
+{
+ if (left->prio != right->prio)
+ return 0;
+
+ /*
+ * If both waiters have dl_prio(), we check the deadlines of the
+ * associated tasks.
+ * If left waiter has a dl_prio(), and we didn't return 0 above,
+ * then right waiter has a dl_prio() too.
+ */
+ if (dl_prio(left->prio))
+ return left->deadline == right->deadline;
+
+ return 1;
+}
+
static void
rt_mutex_enqueue(struct rt_mutex *lock, struct rt_mutex_waiter *waiter)
{
@@ -320,72 +346,16 @@ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
RB_CLEAR_NODE(&waiter->pi_tree_entry);
}
-/*
- * Calculate task priority from the waiter tree priority
- *
- * Return task->normal_prio when the waiter tree is empty or when
- * the waiter is not allowed to do priority boosting
- */
-int rt_mutex_getprio(struct task_struct *task)
-{
- if (likely(!task_has_pi_waiters(task)))
- return task->normal_prio;
-
- return min(task_top_pi_waiter(task)->prio,
- task->normal_prio);
-}
-
-struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
-{
- if (likely(!task_has_pi_waiters(task)))
- return NULL;
-
- return task_top_pi_waiter(task)->task;
-}
-
-/*
- * Called by sched_setscheduler() to get the priority which will be
- * effective after the change.
- */
-int rt_mutex_get_effective_prio(struct task_struct *task, int newprio)
+static void rt_mutex_adjust_prio(struct task_struct *p)
{
- if (!task_has_pi_waiters(task))
- return newprio;
-
- if (task_top_pi_waiter(task)->task->prio <= newprio)
- return task_top_pi_waiter(task)->task->prio;
- return newprio;
-}
+ struct task_struct *pi_task = NULL;
-/*
- * Adjust the priority of a task, after its pi_waiters got modified.
- *
- * This can be both boosting and unboosting. task->pi_lock must be held.
- */
-static void __rt_mutex_adjust_prio(struct task_struct *task)
-{
- int prio = rt_mutex_getprio(task);
-
- if (task->prio != prio || dl_prio(prio))
- rt_mutex_setprio(task, prio);
-}
+ lockdep_assert_held(&p->pi_lock);
-/*
- * Adjust task priority (undo boosting). Called from the exit path of
- * rt_mutex_slowunlock() and rt_mutex_slowlock().
- *
- * (Note: We do this outside of the protection of lock->wait_lock to
- * allow the lock to be taken while or before we readjust the priority
- * of task. We do not use the spin_xx_mutex() variants here as we are
- * outside of the debug path.)
- */
-void rt_mutex_adjust_prio(struct task_struct *task)
-{
- unsigned long flags;
+ if (task_has_pi_waiters(p))
+ pi_task = task_top_pi_waiter(p)->task;
- raw_spin_lock_irqsave(&task->pi_lock, flags);
- __rt_mutex_adjust_prio(task);
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ rt_mutex_setprio(p, pi_task);
}
/*
@@ -608,7 +578,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* enabled we continue, but stop the requeueing in the chain
* walk.
*/
- if (waiter->prio == task->prio) {
+ if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
if (!detect_deadlock)
goto out_unlock_pi;
else
@@ -704,7 +674,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/* [7] Requeue the waiter in the lock waiter tree. */
rt_mutex_dequeue(lock, waiter);
+
+ /*
+ * Update the waiter prio fields now that we're dequeued.
+ *
+ * These values can have changed through either:
+ *
+ * sys_sched_set_scheduler() / sys_sched_setattr()
+ *
+ * or
+ *
+ * DL CBS enforcement advancing the effective deadline.
+ *
+ * Even though pi_waiters also uses these fields, and that tree is only
+ * updated in [11], we can do this here, since we hold [L], which
+ * serializes all pi_waiters access and rb_erase() does not care about
+ * the values of the node being removed.
+ */
waiter->prio = task->prio;
+ waiter->deadline = task->dl.deadline;
+
rt_mutex_enqueue(lock, waiter);
/* [8] Release the task */
@@ -745,7 +734,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
*/
rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
rt_mutex_enqueue_pi(task, waiter);
- __rt_mutex_adjust_prio(task);
+ rt_mutex_adjust_prio(task);
} else if (prerequeue_top_waiter == waiter) {
/*
@@ -761,7 +750,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
rt_mutex_dequeue_pi(task, waiter);
waiter = rt_mutex_top_waiter(lock);
rt_mutex_enqueue_pi(task, waiter);
- __rt_mutex_adjust_prio(task);
+ rt_mutex_adjust_prio(task);
} else {
/*
* Nothing changed. No need to do any priority
@@ -831,6 +820,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
struct rt_mutex_waiter *waiter)
{
+ lockdep_assert_held(&lock->wait_lock);
+
/*
* Before testing whether we can acquire @lock, we set the
* RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
@@ -890,7 +881,8 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
* the top waiter priority (kernel view),
* @task lost.
*/
- if (task->prio >= rt_mutex_top_waiter(lock)->prio)
+ if (!rt_mutex_waiter_less(task_to_waiter(task),
+ rt_mutex_top_waiter(lock)))
return 0;
/*
@@ -936,8 +928,6 @@ takeit:
*/
rt_mutex_set_owner(lock, task);
- rt_mutex_deadlock_account_lock(lock, task);
-
return 1;
}
@@ -958,6 +948,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
struct rt_mutex *next_lock;
int chain_walk = 0, res;
+ lockdep_assert_held(&lock->wait_lock);
+
/*
* Early deadlock detection. We really don't want the task to
* enqueue on itself just to untangle the mess later. It's not
@@ -971,10 +963,10 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
return -EDEADLK;
raw_spin_lock(&task->pi_lock);
- __rt_mutex_adjust_prio(task);
waiter->task = task;
waiter->lock = lock;
waiter->prio = task->prio;
+ waiter->deadline = task->dl.deadline;
/* Get the top priority waiter on the lock */
if (rt_mutex_has_waiters(lock))
@@ -993,7 +985,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
rt_mutex_dequeue_pi(owner, top_waiter);
rt_mutex_enqueue_pi(owner, waiter);
- __rt_mutex_adjust_prio(owner);
+ rt_mutex_adjust_prio(owner);
if (owner->pi_blocked_on)
chain_walk = 1;
} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
@@ -1045,12 +1037,14 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
waiter = rt_mutex_top_waiter(lock);
/*
- * Remove it from current->pi_waiters. We do not adjust a
- * possible priority boost right now. We execute wakeup in the
- * boosted mode and go back to normal after releasing
- * lock->wait_lock.
+ * Remove it from current->pi_waiters and deboost.
+ *
+ * We must in fact deboost here in order to ensure we call
+ * rt_mutex_setprio() to update p->pi_top_task before the
+ * task unblocks.
*/
rt_mutex_dequeue_pi(current, waiter);
+ rt_mutex_adjust_prio(current);
/*
* As we are waking up the top waiter, and the waiter stays
@@ -1062,9 +1056,19 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
*/
lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
- raw_spin_unlock(&current->pi_lock);
-
+ /*
+ * We deboosted before waking the top waiter task such that we don't
+ * run two tasks with the 'same' priority (and ensure the
+ * p->pi_top_task pointer points to a blocked task). This however can
+ * lead to priority inversion if we would get preempted after the
+ * deboost but before waking our donor task, hence the preempt_disable()
+ * before unlock.
+ *
+ * Pairs with preempt_enable() in rt_mutex_postunlock();
+ */
+ preempt_disable();
wake_q_add(wake_q, waiter->task);
+ raw_spin_unlock(&current->pi_lock);
}
/*
@@ -1080,6 +1084,8 @@ static void remove_waiter(struct rt_mutex *lock,
struct task_struct *owner = rt_mutex_owner(lock);
struct rt_mutex *next_lock;
+ lockdep_assert_held(&lock->wait_lock);
+
raw_spin_lock(&current->pi_lock);
rt_mutex_dequeue(lock, waiter);
current->pi_blocked_on = NULL;
@@ -1099,7 +1105,7 @@ static void remove_waiter(struct rt_mutex *lock,
if (rt_mutex_has_waiters(lock))
rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
- __rt_mutex_adjust_prio(owner);
+ rt_mutex_adjust_prio(owner);
/* Store the lock on which owner is blocked or NULL */
next_lock = task_blocked_on_lock(owner);
@@ -1138,8 +1144,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
raw_spin_lock_irqsave(&task->pi_lock, flags);
waiter = task->pi_blocked_on;
- if (!waiter || (waiter->prio == task->prio &&
- !dl_prio(task->prio))) {
+ if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return;
}
@@ -1153,6 +1158,14 @@ void rt_mutex_adjust_pi(struct task_struct *task)
next_lock, NULL, task);
}
+void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
+{
+ debug_rt_mutex_init_waiter(waiter);
+ RB_CLEAR_NODE(&waiter->pi_tree_entry);
+ RB_CLEAR_NODE(&waiter->tree_entry);
+ waiter->task = NULL;
+}
+
/**
* __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
* @lock: the rt_mutex to take
@@ -1179,7 +1192,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
* TASK_INTERRUPTIBLE checks for signals and
* timeout. Ignored otherwise.
*/
- if (unlikely(state == TASK_INTERRUPTIBLE)) {
+ if (likely(state == TASK_INTERRUPTIBLE)) {
/* Signal pending? */
if (signal_pending(current))
ret = -EINTR;
@@ -1235,9 +1248,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
unsigned long flags;
int ret = 0;
- debug_rt_mutex_init_waiter(&waiter);
- RB_CLEAR_NODE(&waiter.pi_tree_entry);
- RB_CLEAR_NODE(&waiter.tree_entry);
+ rt_mutex_init_waiter(&waiter);
/*
* Technically we could use raw_spin_[un]lock_irq() here, but this can
@@ -1328,7 +1339,8 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
/*
* Slow path to release a rt-mutex.
- * Return whether the current task needs to undo a potential priority boosting.
+ *
+ * Return whether the current task needs to call rt_mutex_postunlock().
*/
static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
struct wake_q_head *wake_q)
@@ -1340,8 +1352,6 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
debug_rt_mutex_unlock(lock);
- rt_mutex_deadlock_account_unlock(current);
-
/*
* We must be careful here if the fast path is enabled. If we
* have no waiters queued we cannot set owner to NULL here
@@ -1388,11 +1398,9 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
* Queue the next waiter for wakeup once we release the wait_lock.
*/
mark_wakeup_next_waiter(wake_q, lock);
-
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
- /* check PI boosting */
- return true;
+ return true; /* call rt_mutex_postunlock() */
}
/*
@@ -1407,11 +1415,10 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
enum rtmutex_chainwalk chwalk))
{
- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
- rt_mutex_deadlock_account_lock(lock, current);
+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
return 0;
- } else
- return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
+
+ return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK);
}
static inline int
@@ -1423,24 +1430,33 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
enum rtmutex_chainwalk chwalk))
{
if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
- likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
- rt_mutex_deadlock_account_lock(lock, current);
+ likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
return 0;
- } else
- return slowfn(lock, state, timeout, chwalk);
+
+ return slowfn(lock, state, timeout, chwalk);
}
static inline int
rt_mutex_fasttrylock(struct rt_mutex *lock,
int (*slowfn)(struct rt_mutex *lock))
{
- if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
- rt_mutex_deadlock_account_lock(lock, current);
+ if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
return 1;
- }
+
return slowfn(lock);
}
+/*
+ * Performs the wakeup of the the top-waiter and re-enables preemption.
+ */
+void rt_mutex_postunlock(struct wake_q_head *wake_q)
+{
+ wake_up_q(wake_q);
+
+ /* Pairs with preempt_disable() in rt_mutex_slowunlock() */
+ preempt_enable();
+}
+
static inline void
rt_mutex_fastunlock(struct rt_mutex *lock,
bool (*slowfn)(struct rt_mutex *lock,
@@ -1448,18 +1464,11 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
{
DEFINE_WAKE_Q(wake_q);
- if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
- rt_mutex_deadlock_account_unlock(current);
-
- } else {
- bool deboost = slowfn(lock, &wake_q);
-
- wake_up_q(&wake_q);
+ if (likely(rt_mutex_cmpxchg_release(lock, current, NULL)))
+ return;
- /* Undo pi boosting if necessary: */
- if (deboost)
- rt_mutex_adjust_prio(current);
- }
+ if (slowfn(lock, &wake_q))
+ rt_mutex_postunlock(&wake_q);
}
/**
@@ -1471,6 +1480,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock)
{
might_sleep();
+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, rt_mutex_slowlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_lock);
@@ -1486,23 +1496,25 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock);
*/
int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock)
{
+ int ret;
+
might_sleep();
- return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+ ret = rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, rt_mutex_slowlock);
+ if (ret)
+ mutex_release(&lock->dep_map, 1, _RET_IP_);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
/*
- * Futex variant with full deadlock detection.
+ * Futex variant, must not use fastpath.
*/
-int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
- struct hrtimer_sleeper *timeout)
+int __sched rt_mutex_futex_trylock(struct rt_mutex *lock)
{
- might_sleep();
-
- return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
- RT_MUTEX_FULL_CHAINWALK,
- rt_mutex_slowlock);
+ return rt_mutex_slowtrylock(lock);
}
/**
@@ -1521,11 +1533,18 @@ int rt_mutex_timed_futex_lock(struct rt_mutex *lock,
int
rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout)
{
+ int ret;
+
might_sleep();
- return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
+ mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+ ret = rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout,
RT_MUTEX_MIN_CHAINWALK,
rt_mutex_slowlock);
+ if (ret)
+ mutex_release(&lock->dep_map, 1, _RET_IP_);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
@@ -1542,10 +1561,16 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
*/
int __sched rt_mutex_trylock(struct rt_mutex *lock)
{
+ int ret;
+
if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
return 0;
- return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
+ ret = rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
+ if (ret)
+ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(rt_mutex_trylock);
@@ -1556,25 +1581,49 @@ EXPORT_SYMBOL_GPL(rt_mutex_trylock);
*/
void __sched rt_mutex_unlock(struct rt_mutex *lock)
{
+ mutex_release(&lock->dep_map, 1, _RET_IP_);
rt_mutex_fastunlock(lock, rt_mutex_slowunlock);
}
EXPORT_SYMBOL_GPL(rt_mutex_unlock);
/**
- * rt_mutex_futex_unlock - Futex variant of rt_mutex_unlock
- * @lock: the rt_mutex to be unlocked
- *
- * Returns: true/false indicating whether priority adjustment is
- * required or not.
+ * Futex variant, that since futex variants do not use the fast-path, can be
+ * simple and will not need to retry.
*/
-bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
- struct wake_q_head *wqh)
+bool __sched __rt_mutex_futex_unlock(struct rt_mutex *lock,
+ struct wake_q_head *wake_q)
{
- if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
- rt_mutex_deadlock_account_unlock(current);
- return false;
+ lockdep_assert_held(&lock->wait_lock);
+
+ debug_rt_mutex_unlock(lock);
+
+ if (!rt_mutex_has_waiters(lock)) {
+ lock->owner = NULL;
+ return false; /* done */
}
- return rt_mutex_slowunlock(lock, wqh);
+
+ /*
+ * We've already deboosted, mark_wakeup_next_waiter() will
+ * retain preempt_disabled when we drop the wait_lock, to
+ * avoid inversion prior to the wakeup. preempt_disable()
+ * therein pairs with rt_mutex_postunlock().
+ */
+ mark_wakeup_next_waiter(wake_q, lock);
+
+ return true; /* call postunlock() */
+}
+
+void __sched rt_mutex_futex_unlock(struct rt_mutex *lock)
+{
+ DEFINE_WAKE_Q(wake_q);
+ bool postunlock;
+
+ raw_spin_lock_irq(&lock->wait_lock);
+ postunlock = __rt_mutex_futex_unlock(lock, &wake_q);
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ if (postunlock)
+ rt_mutex_postunlock(&wake_q);
}
/**
@@ -1592,7 +1641,6 @@ void rt_mutex_destroy(struct rt_mutex *lock)
lock->magic = NULL;
#endif
}
-
EXPORT_SYMBOL_GPL(rt_mutex_destroy);
/**
@@ -1604,14 +1652,16 @@ EXPORT_SYMBOL_GPL(rt_mutex_destroy);
*
* Initializing of a locked rt lock is not allowed
*/
-void __rt_mutex_init(struct rt_mutex *lock, const char *name)
+void __rt_mutex_init(struct rt_mutex *lock, const char *name,
+ struct lock_class_key *key)
{
lock->owner = NULL;
raw_spin_lock_init(&lock->wait_lock);
lock->waiters = RB_ROOT;
lock->waiters_leftmost = NULL;
- debug_rt_mutex_init(lock, name);
+ if (name && key)
+ debug_rt_mutex_init(lock, name, key);
}
EXPORT_SYMBOL_GPL(__rt_mutex_init);
@@ -1632,10 +1682,9 @@ EXPORT_SYMBOL_GPL(__rt_mutex_init);
void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner)
{
- __rt_mutex_init(lock, NULL);
+ __rt_mutex_init(lock, NULL, NULL);
debug_rt_mutex_proxy_lock(lock, proxy_owner);
rt_mutex_set_owner(lock, proxy_owner);
- rt_mutex_deadlock_account_lock(lock, proxy_owner);
}
/**
@@ -1655,34 +1704,16 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
{
debug_rt_mutex_proxy_unlock(lock);
rt_mutex_set_owner(lock, NULL);
- rt_mutex_deadlock_account_unlock(proxy_owner);
}
-/**
- * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
- * @lock: the rt_mutex to take
- * @waiter: the pre-initialized rt_mutex_waiter
- * @task: the task to prepare
- *
- * Returns:
- * 0 - task blocked on lock
- * 1 - acquired the lock for task, caller should wake it up
- * <0 - error
- *
- * Special API call for FUTEX_REQUEUE_PI support.
- */
-int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task)
{
int ret;
- raw_spin_lock_irq(&lock->wait_lock);
-
- if (try_to_take_rt_mutex(lock, task, NULL)) {
- raw_spin_unlock_irq(&lock->wait_lock);
+ if (try_to_take_rt_mutex(lock, task, NULL))
return 1;
- }
/* We enforce deadlock detection for futexes */
ret = task_blocks_on_rt_mutex(lock, waiter, task,
@@ -1701,14 +1732,38 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
if (unlikely(ret))
remove_waiter(lock, waiter);
- raw_spin_unlock_irq(&lock->wait_lock);
-
debug_rt_mutex_print_deadlock(waiter);
return ret;
}
/**
+ * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
+ * @lock: the rt_mutex to take
+ * @waiter: the pre-initialized rt_mutex_waiter
+ * @task: the task to prepare
+ *
+ * Returns:
+ * 0 - task blocked on lock
+ * 1 - acquired the lock for task, caller should wake it up
+ * <0 - error
+ *
+ * Special API call for FUTEX_REQUEUE_PI support.
+ */
+int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *task)
+{
+ int ret;
+
+ raw_spin_lock_irq(&lock->wait_lock);
+ ret = __rt_mutex_start_proxy_lock(lock, waiter, task);
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ return ret;
+}
+
+/**
* rt_mutex_next_owner - return the next owner of the lock
*
* @lock: the rt lock query
@@ -1729,36 +1784,87 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
}
/**
- * rt_mutex_finish_proxy_lock() - Complete lock acquisition
+ * rt_mutex_wait_proxy_lock() - Wait for lock acquisition
* @lock: the rt_mutex we were woken on
* @to: the timeout, null if none. hrtimer should already have
* been started.
* @waiter: the pre-initialized rt_mutex_waiter
*
- * Complete the lock acquisition started our behalf by another thread.
+ * Wait for the the lock acquisition started on our behalf by
+ * rt_mutex_start_proxy_lock(). Upon failure, the caller must call
+ * rt_mutex_cleanup_proxy_lock().
*
* Returns:
* 0 - success
* <0 - error, one of -EINTR, -ETIMEDOUT
*
- * Special API call for PI-futex requeue support
+ * Special API call for PI-futex support
*/
-int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
+int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
struct hrtimer_sleeper *to,
struct rt_mutex_waiter *waiter)
{
int ret;
raw_spin_lock_irq(&lock->wait_lock);
-
- set_current_state(TASK_INTERRUPTIBLE);
-
/* sleep on the mutex */
+ set_current_state(TASK_INTERRUPTIBLE);
ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
+ /*
+ * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+ * have to fix that up.
+ */
+ fixup_rt_mutex_waiters(lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
- if (unlikely(ret))
- remove_waiter(lock, waiter);
+ return ret;
+}
+/**
+ * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition
+ * @lock: the rt_mutex we were woken on
+ * @waiter: the pre-initialized rt_mutex_waiter
+ *
+ * Attempt to clean up after a failed rt_mutex_wait_proxy_lock().
+ *
+ * Unless we acquired the lock; we're still enqueued on the wait-list and can
+ * in fact still be granted ownership until we're removed. Therefore we can
+ * find we are in fact the owner and must disregard the
+ * rt_mutex_wait_proxy_lock() failure.
+ *
+ * Returns:
+ * true - did the cleanup, we done.
+ * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned,
+ * caller should disregards its return value.
+ *
+ * Special API call for PI-futex support
+ */
+bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter)
+{
+ bool cleanup = false;
+
+ raw_spin_lock_irq(&lock->wait_lock);
+ /*
+ * Do an unconditional try-lock, this deals with the lock stealing
+ * state where __rt_mutex_futex_unlock() -> mark_wakeup_next_waiter()
+ * sets a NULL owner.
+ *
+ * We're not interested in the return value, because the subsequent
+ * test on rt_mutex_owner() will infer that. If the trylock succeeded,
+ * we will own the lock and it will have removed the waiter. If we
+ * failed the trylock, we're still not owner and we need to remove
+ * ourselves.
+ */
+ try_to_take_rt_mutex(lock, current, waiter);
+ /*
+ * Unless we're the owner; we're still enqueued on the wait_list.
+ * So check if we became owner, if not, take us off the wait_list.
+ */
+ if (rt_mutex_owner(lock) != current) {
+ remove_waiter(lock, waiter);
+ cleanup = true;
+ }
/*
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
* have to fix that up.
@@ -1767,5 +1873,5 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
raw_spin_unlock_irq(&lock->wait_lock);
- return ret;
+ return cleanup;
}
diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h
index c4060584c407..5c253caffe91 100644
--- a/kernel/locking/rtmutex.h
+++ b/kernel/locking/rtmutex.h
@@ -11,15 +11,13 @@
*/
#define rt_mutex_deadlock_check(l) (0)
-#define rt_mutex_deadlock_account_lock(m, t) do { } while (0)
-#define rt_mutex_deadlock_account_unlock(l) do { } while (0)
#define debug_rt_mutex_init_waiter(w) do { } while (0)
#define debug_rt_mutex_free_waiter(w) do { } while (0)
#define debug_rt_mutex_lock(l) do { } while (0)
#define debug_rt_mutex_proxy_lock(l,p) do { } while (0)
#define debug_rt_mutex_proxy_unlock(l) do { } while (0)
#define debug_rt_mutex_unlock(l) do { } while (0)
-#define debug_rt_mutex_init(m, n) do { } while (0)
+#define debug_rt_mutex_init(m, n, k) do { } while (0)
#define debug_rt_mutex_deadlock(d, a ,l) do { } while (0)
#define debug_rt_mutex_print_deadlock(w) do { } while (0)
#define debug_rt_mutex_reset_waiter(w) do { } while (0)
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index 990134617b4c..72ad45a9a794 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -13,6 +13,7 @@
#define __KERNEL_RTMUTEX_COMMON_H
#include <linux/rtmutex.h>
+#include <linux/sched/wake_q.h>
/*
* This is the control structure for tasks blocked on a rt_mutex,
@@ -33,6 +34,7 @@ struct rt_mutex_waiter {
struct rt_mutex *deadlock_lock;
#endif
int prio;
+ u64 deadline;
};
/*
@@ -102,16 +104,26 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner);
+extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
+extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter,
+ struct task_struct *task);
extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task);
-extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
- struct hrtimer_sleeper *to,
- struct rt_mutex_waiter *waiter);
-extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
-extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
- struct wake_q_head *wqh);
-extern void rt_mutex_adjust_prio(struct task_struct *task);
+extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
+ struct hrtimer_sleeper *to,
+ struct rt_mutex_waiter *waiter);
+extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter);
+
+extern int rt_mutex_futex_trylock(struct rt_mutex *l);
+
+extern void rt_mutex_futex_unlock(struct rt_mutex *lock);
+extern bool __rt_mutex_futex_unlock(struct rt_mutex *lock,
+ struct wake_q_head *wqh);
+
+extern void rt_mutex_postunlock(struct wake_q_head *wake_q);
#ifdef CONFIG_DEBUG_RT_MUTEXES
# include "rtmutex-debug.h"
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
index 1591f6b3539f..20819df98125 100644
--- a/kernel/locking/rwsem-spinlock.c
+++ b/kernel/locking/rwsem-spinlock.c
@@ -6,7 +6,8 @@
* - Derived also from comments by Linus
*/
#include <linux/rwsem.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
#include <linux/export.h>
enum rwsem_waiter_type {
@@ -128,7 +129,6 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem)
void __sched __down_read(struct rw_semaphore *sem)
{
struct rwsem_waiter waiter;
- struct task_struct *tsk;
unsigned long flags;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
@@ -140,13 +140,12 @@ void __sched __down_read(struct rw_semaphore *sem)
goto out;
}
- tsk = current;
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
/* set up my own style of waitqueue */
- waiter.task = tsk;
+ waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
- get_task_struct(tsk);
+ get_task_struct(current);
list_add_tail(&waiter.list, &sem->wait_list);
@@ -158,10 +157,10 @@ void __sched __down_read(struct rw_semaphore *sem)
if (!waiter.task)
break;
schedule();
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
}
- __set_task_state(tsk, TASK_RUNNING);
+ __set_current_state(TASK_RUNNING);
out:
;
}
@@ -194,15 +193,13 @@ int __down_read_trylock(struct rw_semaphore *sem)
int __sched __down_write_common(struct rw_semaphore *sem, int state)
{
struct rwsem_waiter waiter;
- struct task_struct *tsk;
unsigned long flags;
int ret = 0;
raw_spin_lock_irqsave(&sem->wait_lock, flags);
/* set up my own style of waitqueue */
- tsk = current;
- waiter.task = tsk;
+ waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_WRITE;
list_add_tail(&waiter.list, &sem->wait_list);
@@ -216,23 +213,29 @@ int __sched __down_write_common(struct rw_semaphore *sem, int state)
*/
if (sem->count == 0)
break;
- if (signal_pending_state(state, current)) {
- ret = -EINTR;
- goto out;
- }
- set_task_state(tsk, state);
+ if (signal_pending_state(state, current))
+ goto out_nolock;
+
+ set_current_state(state);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
schedule();
raw_spin_lock_irqsave(&sem->wait_lock, flags);
}
/* got the lock */
sem->count = -1;
-out:
list_del(&waiter.list);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
return ret;
+
+out_nolock:
+ list_del(&waiter.list);
+ if (!list_empty(&sem->wait_list) && sem->count >= 0)
+ __rwsem_do_wake(sem, 0);
+ raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+
+ return -EINTR;
}
void __sched __down_write(struct rw_semaphore *sem)
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 631506004f9e..34e727f18e49 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -10,10 +10,12 @@
* and Davidlohr Bueso <[email protected]>. Based on mutexes.
*/
#include <linux/rwsem.h>
-#include <linux/sched.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/sched/signal.h>
#include <linux/sched/rt.h>
+#include <linux/sched/wake_q.h>
+#include <linux/sched/debug.h>
#include <linux/osq_lock.h>
#include "rwsem.h"
@@ -224,10 +226,9 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
{
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
struct rwsem_waiter waiter;
- struct task_struct *tsk = current;
DEFINE_WAKE_Q(wake_q);
- waiter.task = tsk;
+ waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
raw_spin_lock_irq(&sem->wait_lock);
@@ -254,13 +255,13 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
/* wait to be given the lock */
while (true) {
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
if (!waiter.task)
break;
schedule();
}
- __set_task_state(tsk, TASK_RUNNING);
+ __set_current_state(TASK_RUNNING);
return sem;
}
EXPORT_SYMBOL(rwsem_down_read_failed);
@@ -503,8 +504,6 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
* wake any read locks that were queued ahead of us.
*/
if (count > RWSEM_WAITING_BIAS) {
- DEFINE_WAKE_Q(wake_q);
-
__rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
/*
* The wakeup is normally called _after_ the wait_lock
@@ -514,6 +513,11 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
* for attempting rwsem_try_write_lock().
*/
wake_up_q(&wake_q);
+
+ /*
+ * Reinitialize wake_q after use.
+ */
+ wake_q_init(&wake_q);
}
} else
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 45ba475d4be3..4d48b1c4870d 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -7,6 +7,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/debug.h>
#include <linux/export.h>
#include <linux/rwsem.h>
#include <linux/atomic.h>
@@ -123,10 +124,8 @@ EXPORT_SYMBOL(up_write);
*/
void downgrade_write(struct rw_semaphore *sem)
{
- /*
- * lockdep: a downgraded write will live on as a write
- * dependency.
- */
+ lock_downgrade(&sem->dep_map, _RET_IP_);
+
rwsem_set_reader_owned(sem);
__downgrade_write(sem);
}
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index b8120abe594b..561acdd39960 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -29,6 +29,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/sched.h>
+#include <linux/sched/debug.h>
#include <linux/semaphore.h>
#include <linux/spinlock.h>
#include <linux/ftrace.h>
@@ -204,19 +205,18 @@ struct semaphore_waiter {
static inline int __sched __down_common(struct semaphore *sem, long state,
long timeout)
{
- struct task_struct *task = current;
struct semaphore_waiter waiter;
list_add_tail(&waiter.list, &sem->wait_list);
- waiter.task = task;
+ waiter.task = current;
waiter.up = false;
for (;;) {
- if (signal_pending_state(state, task))
+ if (signal_pending_state(state, current))
goto interrupted;
if (unlikely(timeout <= 0))
goto timed_out;
- __set_task_state(task, state);
+ __set_current_state(state);
raw_spin_unlock_irq(&sem->lock);
timeout = schedule_timeout(timeout);
raw_spin_lock_irq(&sem->lock);
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index db3ccb1dd614..4b082b5cac9e 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -363,14 +363,6 @@ void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
}
EXPORT_SYMBOL(_raw_spin_lock_nested);
-void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass)
-{
- __local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
- spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
- LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
-}
-EXPORT_SYMBOL(_raw_spin_lock_bh_nested);
-
unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
int subclass)
{
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 0374a596cffa..9aa0fccd5d43 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -103,38 +103,14 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock)
lock->owner_cpu = -1;
}
-static void __spin_lock_debug(raw_spinlock_t *lock)
-{
- u64 i;
- u64 loops = loops_per_jiffy * HZ;
-
- for (i = 0; i < loops; i++) {
- if (arch_spin_trylock(&lock->raw_lock))
- return;
- __delay(1);
- }
- /* lockup suspected: */
- spin_dump(lock, "lockup suspected");
-#ifdef CONFIG_SMP
- trigger_all_cpu_backtrace();
-#endif
-
- /*
- * The trylock above was causing a livelock. Give the lower level arch
- * specific lock code a chance to acquire the lock. We have already
- * printed a warning/backtrace at this point. The non-debug arch
- * specific code might actually succeed in acquiring the lock. If it is
- * not successful, the end-result is the same - there is no forward
- * progress.
- */
- arch_spin_lock(&lock->raw_lock);
-}
-
+/*
+ * We are now relying on the NMI watchdog to detect lockup instead of doing
+ * the detection here with an unfair lock which can cause problem of its own.
+ */
void do_raw_spin_lock(raw_spinlock_t *lock)
{
debug_spin_lock_before(lock);
- if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
- __spin_lock_debug(lock);
+ arch_spin_lock(&lock->raw_lock);
debug_spin_lock_after(lock);
}
@@ -172,32 +148,6 @@ static void rwlock_bug(rwlock_t *lock, const char *msg)
#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg)
-#if 0 /* __write_lock_debug() can lock up - maybe this can too? */
-static void __read_lock_debug(rwlock_t *lock)
-{
- u64 i;
- u64 loops = loops_per_jiffy * HZ;
- int print_once = 1;
-
- for (;;) {
- for (i = 0; i < loops; i++) {
- if (arch_read_trylock(&lock->raw_lock))
- return;
- __delay(1);
- }
- /* lockup suspected: */
- if (print_once) {
- print_once = 0;
- printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, "
- "%s/%d, %p\n",
- raw_smp_processor_id(), current->comm,
- current->pid, lock);
- dump_stack();
- }
- }
-}
-#endif
-
void do_raw_read_lock(rwlock_t *lock)
{
RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
@@ -247,32 +197,6 @@ static inline void debug_write_unlock(rwlock_t *lock)
lock->owner_cpu = -1;
}
-#if 0 /* This can cause lockups */
-static void __write_lock_debug(rwlock_t *lock)
-{
- u64 i;
- u64 loops = loops_per_jiffy * HZ;
- int print_once = 1;
-
- for (;;) {
- for (i = 0; i < loops; i++) {
- if (arch_write_trylock(&lock->raw_lock))
- return;
- __delay(1);
- }
- /* lockup suspected: */
- if (print_once) {
- print_once = 0;
- printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, "
- "%s/%d, %p\n",
- raw_smp_processor_id(), current->comm,
- current->pid, lock);
- dump_stack();
- }
- }
-}
-#endif
-
void do_raw_write_lock(rwlock_t *lock)
{
debug_write_lock_before(lock);
diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
new file mode 100644
index 000000000000..39f56c870051
--- /dev/null
+++ b/kernel/locking/test-ww_mutex.c
@@ -0,0 +1,645 @@
+/*
+ * Module-based API test facility for ww_mutexes
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ */
+
+#include <linux/kernel.h>
+
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/ww_mutex.h>
+
+static DEFINE_WW_CLASS(ww_class);
+struct workqueue_struct *wq;
+
+struct test_mutex {
+ struct work_struct work;
+ struct ww_mutex mutex;
+ struct completion ready, go, done;
+ unsigned int flags;
+};
+
+#define TEST_MTX_SPIN BIT(0)
+#define TEST_MTX_TRY BIT(1)
+#define TEST_MTX_CTX BIT(2)
+#define __TEST_MTX_LAST BIT(3)
+
+static void test_mutex_work(struct work_struct *work)
+{
+ struct test_mutex *mtx = container_of(work, typeof(*mtx), work);
+
+ complete(&mtx->ready);
+ wait_for_completion(&mtx->go);
+
+ if (mtx->flags & TEST_MTX_TRY) {
+ while (!ww_mutex_trylock(&mtx->mutex))
+ cond_resched();
+ } else {
+ ww_mutex_lock(&mtx->mutex, NULL);
+ }
+ complete(&mtx->done);
+ ww_mutex_unlock(&mtx->mutex);
+}
+
+static int __test_mutex(unsigned int flags)
+{
+#define TIMEOUT (HZ / 16)
+ struct test_mutex mtx;
+ struct ww_acquire_ctx ctx;
+ int ret;
+
+ ww_mutex_init(&mtx.mutex, &ww_class);
+ ww_acquire_init(&ctx, &ww_class);
+
+ INIT_WORK_ONSTACK(&mtx.work, test_mutex_work);
+ init_completion(&mtx.ready);
+ init_completion(&mtx.go);
+ init_completion(&mtx.done);
+ mtx.flags = flags;
+
+ schedule_work(&mtx.work);
+
+ wait_for_completion(&mtx.ready);
+ ww_mutex_lock(&mtx.mutex, (flags & TEST_MTX_CTX) ? &ctx : NULL);
+ complete(&mtx.go);
+ if (flags & TEST_MTX_SPIN) {
+ unsigned long timeout = jiffies + TIMEOUT;
+
+ ret = 0;
+ do {
+ if (completion_done(&mtx.done)) {
+ ret = -EINVAL;
+ break;
+ }
+ cond_resched();
+ } while (time_before(jiffies, timeout));
+ } else {
+ ret = wait_for_completion_timeout(&mtx.done, TIMEOUT);
+ }
+ ww_mutex_unlock(&mtx.mutex);
+ ww_acquire_fini(&ctx);
+
+ if (ret) {
+ pr_err("%s(flags=%x): mutual exclusion failure\n",
+ __func__, flags);
+ ret = -EINVAL;
+ }
+
+ flush_work(&mtx.work);
+ destroy_work_on_stack(&mtx.work);
+ return ret;
+#undef TIMEOUT
+}
+
+static int test_mutex(void)
+{
+ int ret;
+ int i;
+
+ for (i = 0; i < __TEST_MTX_LAST; i++) {
+ ret = __test_mutex(i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int test_aa(void)
+{
+ struct ww_mutex mutex;
+ struct ww_acquire_ctx ctx;
+ int ret;
+
+ ww_mutex_init(&mutex, &ww_class);
+ ww_acquire_init(&ctx, &ww_class);
+
+ ww_mutex_lock(&mutex, &ctx);
+
+ if (ww_mutex_trylock(&mutex)) {
+ pr_err("%s: trylocked itself!\n", __func__);
+ ww_mutex_unlock(&mutex);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = ww_mutex_lock(&mutex, &ctx);
+ if (ret != -EALREADY) {
+ pr_err("%s: missed deadlock for recursing, ret=%d\n",
+ __func__, ret);
+ if (!ret)
+ ww_mutex_unlock(&mutex);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ ww_mutex_unlock(&mutex);
+ ww_acquire_fini(&ctx);
+ return ret;
+}
+
+struct test_abba {
+ struct work_struct work;
+ struct ww_mutex a_mutex;
+ struct ww_mutex b_mutex;
+ struct completion a_ready;
+ struct completion b_ready;
+ bool resolve;
+ int result;
+};
+
+static void test_abba_work(struct work_struct *work)
+{
+ struct test_abba *abba = container_of(work, typeof(*abba), work);
+ struct ww_acquire_ctx ctx;
+ int err;
+
+ ww_acquire_init(&ctx, &ww_class);
+ ww_mutex_lock(&abba->b_mutex, &ctx);
+
+ complete(&abba->b_ready);
+ wait_for_completion(&abba->a_ready);
+
+ err = ww_mutex_lock(&abba->a_mutex, &ctx);
+ if (abba->resolve && err == -EDEADLK) {
+ ww_mutex_unlock(&abba->b_mutex);
+ ww_mutex_lock_slow(&abba->a_mutex, &ctx);
+ err = ww_mutex_lock(&abba->b_mutex, &ctx);
+ }
+
+ if (!err)
+ ww_mutex_unlock(&abba->a_mutex);
+ ww_mutex_unlock(&abba->b_mutex);
+ ww_acquire_fini(&ctx);
+
+ abba->result = err;
+}
+
+static int test_abba(bool resolve)
+{
+ struct test_abba abba;
+ struct ww_acquire_ctx ctx;
+ int err, ret;
+
+ ww_mutex_init(&abba.a_mutex, &ww_class);
+ ww_mutex_init(&abba.b_mutex, &ww_class);
+ INIT_WORK_ONSTACK(&abba.work, test_abba_work);
+ init_completion(&abba.a_ready);
+ init_completion(&abba.b_ready);
+ abba.resolve = resolve;
+
+ schedule_work(&abba.work);
+
+ ww_acquire_init(&ctx, &ww_class);
+ ww_mutex_lock(&abba.a_mutex, &ctx);
+
+ complete(&abba.a_ready);
+ wait_for_completion(&abba.b_ready);
+
+ err = ww_mutex_lock(&abba.b_mutex, &ctx);
+ if (resolve && err == -EDEADLK) {
+ ww_mutex_unlock(&abba.a_mutex);
+ ww_mutex_lock_slow(&abba.b_mutex, &ctx);
+ err = ww_mutex_lock(&abba.a_mutex, &ctx);
+ }
+
+ if (!err)
+ ww_mutex_unlock(&abba.b_mutex);
+ ww_mutex_unlock(&abba.a_mutex);
+ ww_acquire_fini(&ctx);
+
+ flush_work(&abba.work);
+ destroy_work_on_stack(&abba.work);
+
+ ret = 0;
+ if (resolve) {
+ if (err || abba.result) {
+ pr_err("%s: failed to resolve ABBA deadlock, A err=%d, B err=%d\n",
+ __func__, err, abba.result);
+ ret = -EINVAL;
+ }
+ } else {
+ if (err != -EDEADLK && abba.result != -EDEADLK) {
+ pr_err("%s: missed ABBA deadlock, A err=%d, B err=%d\n",
+ __func__, err, abba.result);
+ ret = -EINVAL;
+ }
+ }
+ return ret;
+}
+
+struct test_cycle {
+ struct work_struct work;
+ struct ww_mutex a_mutex;
+ struct ww_mutex *b_mutex;
+ struct completion *a_signal;
+ struct completion b_signal;
+ int result;
+};
+
+static void test_cycle_work(struct work_struct *work)
+{
+ struct test_cycle *cycle = container_of(work, typeof(*cycle), work);
+ struct ww_acquire_ctx ctx;
+ int err;
+
+ ww_acquire_init(&ctx, &ww_class);
+ ww_mutex_lock(&cycle->a_mutex, &ctx);
+
+ complete(cycle->a_signal);
+ wait_for_completion(&cycle->b_signal);
+
+ err = ww_mutex_lock(cycle->b_mutex, &ctx);
+ if (err == -EDEADLK) {
+ ww_mutex_unlock(&cycle->a_mutex);
+ ww_mutex_lock_slow(cycle->b_mutex, &ctx);
+ err = ww_mutex_lock(&cycle->a_mutex, &ctx);
+ }
+
+ if (!err)
+ ww_mutex_unlock(cycle->b_mutex);
+ ww_mutex_unlock(&cycle->a_mutex);
+ ww_acquire_fini(&ctx);
+
+ cycle->result = err;
+}
+
+static int __test_cycle(unsigned int nthreads)
+{
+ struct test_cycle *cycles;
+ unsigned int n, last = nthreads - 1;
+ int ret;
+
+ cycles = kmalloc_array(nthreads, sizeof(*cycles), GFP_KERNEL);
+ if (!cycles)
+ return -ENOMEM;
+
+ for (n = 0; n < nthreads; n++) {
+ struct test_cycle *cycle = &cycles[n];
+
+ ww_mutex_init(&cycle->a_mutex, &ww_class);
+ if (n == last)
+ cycle->b_mutex = &cycles[0].a_mutex;
+ else
+ cycle->b_mutex = &cycles[n + 1].a_mutex;
+
+ if (n == 0)
+ cycle->a_signal = &cycles[last].b_signal;
+ else
+ cycle->a_signal = &cycles[n - 1].b_signal;
+ init_completion(&cycle->b_signal);
+
+ INIT_WORK(&cycle->work, test_cycle_work);
+ cycle->result = 0;
+ }
+
+ for (n = 0; n < nthreads; n++)
+ queue_work(wq, &cycles[n].work);
+
+ flush_workqueue(wq);
+
+ ret = 0;
+ for (n = 0; n < nthreads; n++) {
+ struct test_cycle *cycle = &cycles[n];
+
+ if (!cycle->result)
+ continue;
+
+ pr_err("cylic deadlock not resolved, ret[%d/%d] = %d\n",
+ n, nthreads, cycle->result);
+ ret = -EINVAL;
+ break;
+ }
+
+ for (n = 0; n < nthreads; n++)
+ ww_mutex_destroy(&cycles[n].a_mutex);
+ kfree(cycles);
+ return ret;
+}
+
+static int test_cycle(unsigned int ncpus)
+{
+ unsigned int n;
+ int ret;
+
+ for (n = 2; n <= ncpus + 1; n++) {
+ ret = __test_cycle(n);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+struct stress {
+ struct work_struct work;
+ struct ww_mutex *locks;
+ unsigned long timeout;
+ int nlocks;
+};
+
+static int *get_random_order(int count)
+{
+ int *order;
+ int n, r, tmp;
+
+ order = kmalloc_array(count, sizeof(*order), GFP_TEMPORARY);
+ if (!order)
+ return order;
+
+ for (n = 0; n < count; n++)
+ order[n] = n;
+
+ for (n = count - 1; n > 1; n--) {
+ r = get_random_int() % (n + 1);
+ if (r != n) {
+ tmp = order[n];
+ order[n] = order[r];
+ order[r] = tmp;
+ }
+ }
+
+ return order;
+}
+
+static void dummy_load(struct stress *stress)
+{
+ usleep_range(1000, 2000);
+}
+
+static void stress_inorder_work(struct work_struct *work)
+{
+ struct stress *stress = container_of(work, typeof(*stress), work);
+ const int nlocks = stress->nlocks;
+ struct ww_mutex *locks = stress->locks;
+ struct ww_acquire_ctx ctx;
+ int *order;
+
+ order = get_random_order(nlocks);
+ if (!order)
+ return;
+
+ do {
+ int contended = -1;
+ int n, err;
+
+ ww_acquire_init(&ctx, &ww_class);
+retry:
+ err = 0;
+ for (n = 0; n < nlocks; n++) {
+ if (n == contended)
+ continue;
+
+ err = ww_mutex_lock(&locks[order[n]], &ctx);
+ if (err < 0)
+ break;
+ }
+ if (!err)
+ dummy_load(stress);
+
+ if (contended > n)
+ ww_mutex_unlock(&locks[order[contended]]);
+ contended = n;
+ while (n--)
+ ww_mutex_unlock(&locks[order[n]]);
+
+ if (err == -EDEADLK) {
+ ww_mutex_lock_slow(&locks[order[contended]], &ctx);
+ goto retry;
+ }
+
+ if (err) {
+ pr_err_once("stress (%s) failed with %d\n",
+ __func__, err);
+ break;
+ }
+
+ ww_acquire_fini(&ctx);
+ } while (!time_after(jiffies, stress->timeout));
+
+ kfree(order);
+ kfree(stress);
+}
+
+struct reorder_lock {
+ struct list_head link;
+ struct ww_mutex *lock;
+};
+
+static void stress_reorder_work(struct work_struct *work)
+{
+ struct stress *stress = container_of(work, typeof(*stress), work);
+ LIST_HEAD(locks);
+ struct ww_acquire_ctx ctx;
+ struct reorder_lock *ll, *ln;
+ int *order;
+ int n, err;
+
+ order = get_random_order(stress->nlocks);
+ if (!order)
+ return;
+
+ for (n = 0; n < stress->nlocks; n++) {
+ ll = kmalloc(sizeof(*ll), GFP_KERNEL);
+ if (!ll)
+ goto out;
+
+ ll->lock = &stress->locks[order[n]];
+ list_add(&ll->link, &locks);
+ }
+ kfree(order);
+ order = NULL;
+
+ do {
+ ww_acquire_init(&ctx, &ww_class);
+
+ list_for_each_entry(ll, &locks, link) {
+ err = ww_mutex_lock(ll->lock, &ctx);
+ if (!err)
+ continue;
+
+ ln = ll;
+ list_for_each_entry_continue_reverse(ln, &locks, link)
+ ww_mutex_unlock(ln->lock);
+
+ if (err != -EDEADLK) {
+ pr_err_once("stress (%s) failed with %d\n",
+ __func__, err);
+ break;
+ }
+
+ ww_mutex_lock_slow(ll->lock, &ctx);
+ list_move(&ll->link, &locks); /* restarts iteration */
+ }
+
+ dummy_load(stress);
+ list_for_each_entry(ll, &locks, link)
+ ww_mutex_unlock(ll->lock);
+
+ ww_acquire_fini(&ctx);
+ } while (!time_after(jiffies, stress->timeout));
+
+out:
+ list_for_each_entry_safe(ll, ln, &locks, link)
+ kfree(ll);
+ kfree(order);
+ kfree(stress);
+}
+
+static void stress_one_work(struct work_struct *work)
+{
+ struct stress *stress = container_of(work, typeof(*stress), work);
+ const int nlocks = stress->nlocks;
+ struct ww_mutex *lock = stress->locks + (get_random_int() % nlocks);
+ int err;
+
+ do {
+ err = ww_mutex_lock(lock, NULL);
+ if (!err) {
+ dummy_load(stress);
+ ww_mutex_unlock(lock);
+ } else {
+ pr_err_once("stress (%s) failed with %d\n",
+ __func__, err);
+ break;
+ }
+ } while (!time_after(jiffies, stress->timeout));
+
+ kfree(stress);
+}
+
+#define STRESS_INORDER BIT(0)
+#define STRESS_REORDER BIT(1)
+#define STRESS_ONE BIT(2)
+#define STRESS_ALL (STRESS_INORDER | STRESS_REORDER | STRESS_ONE)
+
+static int stress(int nlocks, int nthreads, unsigned int flags)
+{
+ struct ww_mutex *locks;
+ int n;
+
+ locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL);
+ if (!locks)
+ return -ENOMEM;
+
+ for (n = 0; n < nlocks; n++)
+ ww_mutex_init(&locks[n], &ww_class);
+
+ for (n = 0; nthreads; n++) {
+ struct stress *stress;
+ void (*fn)(struct work_struct *work);
+
+ fn = NULL;
+ switch (n & 3) {
+ case 0:
+ if (flags & STRESS_INORDER)
+ fn = stress_inorder_work;
+ break;
+ case 1:
+ if (flags & STRESS_REORDER)
+ fn = stress_reorder_work;
+ break;
+ case 2:
+ if (flags & STRESS_ONE)
+ fn = stress_one_work;
+ break;
+ }
+
+ if (!fn)
+ continue;
+
+ stress = kmalloc(sizeof(*stress), GFP_KERNEL);
+ if (!stress)
+ break;
+
+ INIT_WORK(&stress->work, fn);
+ stress->locks = locks;
+ stress->nlocks = nlocks;
+ stress->timeout = jiffies + 2*HZ;
+
+ queue_work(wq, &stress->work);
+ nthreads--;
+ }
+
+ flush_workqueue(wq);
+
+ for (n = 0; n < nlocks; n++)
+ ww_mutex_destroy(&locks[n]);
+ kfree(locks);
+
+ return 0;
+}
+
+static int __init test_ww_mutex_init(void)
+{
+ int ncpus = num_online_cpus();
+ int ret;
+
+ wq = alloc_workqueue("test-ww_mutex", WQ_UNBOUND, 0);
+ if (!wq)
+ return -ENOMEM;
+
+ ret = test_mutex();
+ if (ret)
+ return ret;
+
+ ret = test_aa();
+ if (ret)
+ return ret;
+
+ ret = test_abba(false);
+ if (ret)
+ return ret;
+
+ ret = test_abba(true);
+ if (ret)
+ return ret;
+
+ ret = test_cycle(ncpus);
+ if (ret)
+ return ret;
+
+ ret = stress(16, 2*ncpus, STRESS_INORDER);
+ if (ret)
+ return ret;
+
+ ret = stress(16, 2*ncpus, STRESS_REORDER);
+ if (ret)
+ return ret;
+
+ ret = stress(4095, hweight32(STRESS_ALL)*ncpus, STRESS_ALL);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void __exit test_ww_mutex_exit(void)
+{
+ destroy_workqueue(wq);
+}
+
+module_init(test_ww_mutex_init);
+module_exit(test_ww_mutex_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel Corporation");