aboutsummaryrefslogtreecommitdiff
path: root/kernel/locking
diff options
context:
space:
mode:
authorTony Lindgren <[email protected]>2016-03-30 10:36:06 -0700
committerTony Lindgren <[email protected]>2016-03-30 10:36:06 -0700
commit1809de7e7d37c585e01a1bcc583ea92b78fc759d (patch)
tree76c5b35c2b04eafce86a1a729c02ab705eba44bc /kernel/locking
parentebf24414809200915b9ddf7f109bba7c278c8210 (diff)
parent3ca4a238106dedc285193ee47f494a6584b6fd2f (diff)
Merge tag 'for-v4.6-rc/omap-fixes-a' of git://git.kernel.org/pub/scm/linux/kernel/git/pjw/omap-pending into omap-for-v4.6/fixes
ARM: OMAP2+: first hwmod fix for v4.6-rc Fix a longstanding bug in the hwmod code that could cause hardware SYSCONFIG register values to not match the kernel's idea of what they should be, and that could result in lower performance during IP block idle entry. Basic build, boot, and PM test logs are available here: http://www.pwsan.com/omap/testlogs/omap-hwmod-fixes-a-for-v4.6-rc/20160326231727/
Diffstat (limited to 'kernel/locking')
-rw-r--r--kernel/locking/Makefile3
-rw-r--r--kernel/locking/lockdep.c189
-rw-r--r--kernel/locking/mcs_spinlock.h8
-rw-r--r--kernel/locking/mutex.c5
-rw-r--r--kernel/locking/qspinlock.c7
-rw-r--r--kernel/locking/qspinlock_paravirt.h21
-rw-r--r--kernel/locking/qspinlock_stat.h16
-rw-r--r--kernel/locking/rtmutex.c135
8 files changed, 190 insertions, 194 deletions
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 8e96f6cc2a4a..31322a4275cd 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -1,3 +1,6 @@
+# Any varying coverage in these files is non-deterministic
+# and is generally not a function of system call inputs.
+KCOV_INSTRUMENT := n
obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 60ace56618f6..53ab2f85d77e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -123,8 +123,6 @@ static inline int debug_locks_off_graph_unlock(void)
return ret;
}
-static int lockdep_initialized;
-
unsigned long nr_list_entries;
static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
@@ -150,8 +148,7 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
}
#ifdef CONFIG_LOCK_STAT
-static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],
- cpu_lock_stats);
+static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], cpu_lock_stats);
static inline u64 lockstat_clock(void)
{
@@ -292,7 +289,7 @@ LIST_HEAD(all_lock_classes);
#define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS)
#define classhashentry(key) (classhash_table + __classhashfn((key)))
-static struct list_head classhash_table[CLASSHASH_SIZE];
+static struct hlist_head classhash_table[CLASSHASH_SIZE];
/*
* We put the lock dependency chains into a hash-table as well, to cache
@@ -303,7 +300,7 @@ static struct list_head classhash_table[CLASSHASH_SIZE];
#define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS)
#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain)))
-static struct list_head chainhash_table[CHAINHASH_SIZE];
+static struct hlist_head chainhash_table[CHAINHASH_SIZE];
/*
* The hash key of the lock dependency chains is a hash itself too:
@@ -434,19 +431,6 @@ unsigned int max_lockdep_depth;
#ifdef CONFIG_DEBUG_LOCKDEP
/*
- * We cannot printk in early bootup code. Not even early_printk()
- * might work. So we mark any initialization errors and printk
- * about it later on, in lockdep_info().
- */
-static int lockdep_init_error;
-static const char *lock_init_error;
-static unsigned long lockdep_init_trace_data[20];
-static struct stack_trace lockdep_init_trace = {
- .max_entries = ARRAY_SIZE(lockdep_init_trace_data),
- .entries = lockdep_init_trace_data,
-};
-
-/*
* Various lockdep statistics:
*/
DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
@@ -666,23 +650,9 @@ static inline struct lock_class *
look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
{
struct lockdep_subclass_key *key;
- struct list_head *hash_head;
+ struct hlist_head *hash_head;
struct lock_class *class;
-#ifdef CONFIG_DEBUG_LOCKDEP
- /*
- * If the architecture calls into lockdep before initializing
- * the hashes then we'll warn about it later. (we cannot printk
- * right now)
- */
- if (unlikely(!lockdep_initialized)) {
- lockdep_init();
- lockdep_init_error = 1;
- lock_init_error = lock->name;
- save_stack_trace(&lockdep_init_trace);
- }
-#endif
-
if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
debug_locks_off();
printk(KERN_ERR
@@ -719,7 +689,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return NULL;
- list_for_each_entry_rcu(class, hash_head, hash_entry) {
+ hlist_for_each_entry_rcu(class, hash_head, hash_entry) {
if (class->key == key) {
/*
* Huh! same key, different name? Did someone trample
@@ -742,7 +712,7 @@ static inline struct lock_class *
register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
{
struct lockdep_subclass_key *key;
- struct list_head *hash_head;
+ struct hlist_head *hash_head;
struct lock_class *class;
DEBUG_LOCKS_WARN_ON(!irqs_disabled());
@@ -774,7 +744,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
* We have to do the hash-walk again, to avoid races
* with another CPU:
*/
- list_for_each_entry_rcu(class, hash_head, hash_entry) {
+ hlist_for_each_entry_rcu(class, hash_head, hash_entry) {
if (class->key == key)
goto out_unlock_set;
}
@@ -805,7 +775,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
* We use RCU's safe list-add method to make
* parallel walking of the hash-list safe:
*/
- list_add_tail_rcu(&class->hash_entry, hash_head);
+ hlist_add_head_rcu(&class->hash_entry, hash_head);
/*
* Add it to the global list of classes:
*/
@@ -1822,7 +1792,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
*/
static int
check_prev_add(struct task_struct *curr, struct held_lock *prev,
- struct held_lock *next, int distance, int trylock_loop)
+ struct held_lock *next, int distance, int *stack_saved)
{
struct lock_list *entry;
int ret;
@@ -1883,8 +1853,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
}
}
- if (!trylock_loop && !save_trace(&trace))
- return 0;
+ if (!*stack_saved) {
+ if (!save_trace(&trace))
+ return 0;
+ *stack_saved = 1;
+ }
/*
* Ok, all validations passed, add the new lock
@@ -1907,6 +1880,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
* Debugging printouts:
*/
if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
+ /* We drop graph lock, so another thread can overwrite trace. */
+ *stack_saved = 0;
graph_unlock();
printk("\n new dependency: ");
print_lock_name(hlock_class(prev));
@@ -1929,7 +1904,7 @@ static int
check_prevs_add(struct task_struct *curr, struct held_lock *next)
{
int depth = curr->lockdep_depth;
- int trylock_loop = 0;
+ int stack_saved = 0;
struct held_lock *hlock;
/*
@@ -1956,7 +1931,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
*/
if (hlock->read != 2 && hlock->check) {
if (!check_prev_add(curr, hlock, next,
- distance, trylock_loop))
+ distance, &stack_saved))
return 0;
/*
* Stop after the first non-trylock entry,
@@ -1979,7 +1954,6 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
if (curr->held_locks[depth].irq_context !=
curr->held_locks[depth-1].irq_context)
break;
- trylock_loop = 1;
}
return 1;
out_bug:
@@ -2007,6 +1981,53 @@ struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i)
}
/*
+ * Returns the index of the first held_lock of the current chain
+ */
+static inline int get_first_held_lock(struct task_struct *curr,
+ struct held_lock *hlock)
+{
+ int i;
+ struct held_lock *hlock_curr;
+
+ for (i = curr->lockdep_depth - 1; i >= 0; i--) {
+ hlock_curr = curr->held_locks + i;
+ if (hlock_curr->irq_context != hlock->irq_context)
+ break;
+
+ }
+
+ return ++i;
+}
+
+/*
+ * Checks whether the chain and the current held locks are consistent
+ * in depth and also in content. If they are not it most likely means
+ * that there was a collision during the calculation of the chain_key.
+ * Returns: 0 not passed, 1 passed
+ */
+static int check_no_collision(struct task_struct *curr,
+ struct held_lock *hlock,
+ struct lock_chain *chain)
+{
+#ifdef CONFIG_DEBUG_LOCKDEP
+ int i, j, id;
+
+ i = get_first_held_lock(curr, hlock);
+
+ if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1)))
+ return 0;
+
+ for (j = 0; j < chain->depth - 1; j++, i++) {
+ id = curr->held_locks[i].class_idx - 1;
+
+ if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id))
+ return 0;
+ }
+#endif
+ return 1;
+}
+
+/*
* Look up a dependency chain. If the key is not present yet then
* add it and return 1 - in this case the new dependency chain is
* validated. If the key is already hashed, return 0.
@@ -2017,9 +2038,8 @@ static inline int lookup_chain_cache(struct task_struct *curr,
u64 chain_key)
{
struct lock_class *class = hlock_class(hlock);
- struct list_head *hash_head = chainhashentry(chain_key);
+ struct hlist_head *hash_head = chainhashentry(chain_key);
struct lock_chain *chain;
- struct held_lock *hlock_curr;
int i, j;
/*
@@ -2033,10 +2053,13 @@ static inline int lookup_chain_cache(struct task_struct *curr,
* We can walk it lock-free, because entries only get added
* to the hash:
*/
- list_for_each_entry_rcu(chain, hash_head, entry) {
+ hlist_for_each_entry_rcu(chain, hash_head, entry) {
if (chain->chain_key == chain_key) {
cache_hit:
debug_atomic_inc(chain_lookup_hits);
+ if (!check_no_collision(curr, hlock, chain))
+ return 0;
+
if (very_verbose(class))
printk("\nhash chain already cached, key: "
"%016Lx tail class: [%p] %s\n",
@@ -2057,7 +2080,7 @@ cache_hit:
/*
* We have to walk the chain again locked - to avoid duplicates:
*/
- list_for_each_entry(chain, hash_head, entry) {
+ hlist_for_each_entry(chain, hash_head, entry) {
if (chain->chain_key == chain_key) {
graph_unlock();
goto cache_hit;
@@ -2074,13 +2097,7 @@ cache_hit:
chain = lock_chains + nr_lock_chains++;
chain->chain_key = chain_key;
chain->irq_context = hlock->irq_context;
- /* Find the first held_lock of current chain */
- for (i = curr->lockdep_depth - 1; i >= 0; i--) {
- hlock_curr = curr->held_locks + i;
- if (hlock_curr->irq_context != hlock->irq_context)
- break;
- }
- i++;
+ i = get_first_held_lock(curr, hlock);
chain->depth = curr->lockdep_depth + 1 - i;
if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
chain->base = nr_chain_hlocks;
@@ -2091,7 +2108,7 @@ cache_hit:
}
chain_hlocks[chain->base + j] = class - lock_classes;
}
- list_add_tail_rcu(&chain->entry, hash_head);
+ hlist_add_head_rcu(&chain->entry, hash_head);
debug_atomic_inc(chain_lookup_misses);
inc_chains();
@@ -2168,7 +2185,7 @@ static void check_chain_key(struct task_struct *curr)
{
#ifdef CONFIG_DEBUG_LOCKDEP
struct held_lock *hlock, *prev_hlock = NULL;
- unsigned int i, id;
+ unsigned int i;
u64 chain_key = 0;
for (i = 0; i < curr->lockdep_depth; i++) {
@@ -2185,17 +2202,16 @@ static void check_chain_key(struct task_struct *curr)
(unsigned long long)hlock->prev_chain_key);
return;
}
- id = hlock->class_idx - 1;
/*
* Whoops ran out of static storage again?
*/
- if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+ if (DEBUG_LOCKS_WARN_ON(hlock->class_idx > MAX_LOCKDEP_KEYS))
return;
if (prev_hlock && (prev_hlock->irq_context !=
hlock->irq_context))
chain_key = 0;
- chain_key = iterate_chain_key(chain_key, id);
+ chain_key = iterate_chain_key(chain_key, hlock->class_idx);
prev_hlock = hlock;
}
if (chain_key != curr->curr_chain_key) {
@@ -3073,7 +3089,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
struct task_struct *curr = current;
struct lock_class *class = NULL;
struct held_lock *hlock;
- unsigned int depth, id;
+ unsigned int depth;
int chain_head = 0;
int class_idx;
u64 chain_key;
@@ -3176,11 +3192,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
* The 'key ID' is what is the most compact key value to drive
* the hash, not class->key.
*/
- id = class - lock_classes;
/*
* Whoops, we did it again.. ran straight out of our static allocation.
*/
- if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+ if (DEBUG_LOCKS_WARN_ON(class_idx > MAX_LOCKDEP_KEYS))
return 0;
chain_key = curr->curr_chain_key;
@@ -3198,7 +3213,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
chain_key = 0;
chain_head = 1;
}
- chain_key = iterate_chain_key(chain_key, id);
+ chain_key = iterate_chain_key(chain_key, class_idx);
if (nest_lock && !__lock_is_held(nest_lock))
return print_lock_nested_lock_not_held(curr, hlock, ip);
@@ -3875,7 +3890,7 @@ void lockdep_reset(void)
nr_process_chains = 0;
debug_locks = 1;
for (i = 0; i < CHAINHASH_SIZE; i++)
- INIT_LIST_HEAD(chainhash_table + i);
+ INIT_HLIST_HEAD(chainhash_table + i);
raw_local_irq_restore(flags);
}
@@ -3894,7 +3909,7 @@ static void zap_class(struct lock_class *class)
/*
* Unhash the class and remove it from the all_lock_classes list:
*/
- list_del_rcu(&class->hash_entry);
+ hlist_del_rcu(&class->hash_entry);
list_del_rcu(&class->lock_entry);
RCU_INIT_POINTER(class->key, NULL);
@@ -3917,7 +3932,7 @@ static inline int within(const void *addr, void *start, unsigned long size)
void lockdep_free_key_range(void *start, unsigned long size)
{
struct lock_class *class;
- struct list_head *head;
+ struct hlist_head *head;
unsigned long flags;
int i;
int locked;
@@ -3930,9 +3945,7 @@ void lockdep_free_key_range(void *start, unsigned long size)
*/
for (i = 0; i < CLASSHASH_SIZE; i++) {
head = classhash_table + i;
- if (list_empty(head))
- continue;
- list_for_each_entry_rcu(class, head, hash_entry) {
+ hlist_for_each_entry_rcu(class, head, hash_entry) {
if (within(class->key, start, size))
zap_class(class);
else if (within(class->name, start, size))
@@ -3962,7 +3975,7 @@ void lockdep_free_key_range(void *start, unsigned long size)
void lockdep_reset_lock(struct lockdep_map *lock)
{
struct lock_class *class;
- struct list_head *head;
+ struct hlist_head *head;
unsigned long flags;
int i, j;
int locked;
@@ -3987,9 +4000,7 @@ void lockdep_reset_lock(struct lockdep_map *lock)
locked = graph_lock();
for (i = 0; i < CLASSHASH_SIZE; i++) {
head = classhash_table + i;
- if (list_empty(head))
- continue;
- list_for_each_entry_rcu(class, head, hash_entry) {
+ hlist_for_each_entry_rcu(class, head, hash_entry) {
int match = 0;
for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
@@ -4013,28 +4024,6 @@ out_restore:
raw_local_irq_restore(flags);
}
-void lockdep_init(void)
-{
- int i;
-
- /*
- * Some architectures have their own start_kernel()
- * code which calls lockdep_init(), while we also
- * call lockdep_init() from the start_kernel() itself,
- * and we want to initialize the hashes only once:
- */
- if (lockdep_initialized)
- return;
-
- for (i = 0; i < CLASSHASH_SIZE; i++)
- INIT_LIST_HEAD(classhash_table + i);
-
- for (i = 0; i < CHAINHASH_SIZE; i++)
- INIT_LIST_HEAD(chainhash_table + i);
-
- lockdep_initialized = 1;
-}
-
void __init lockdep_info(void)
{
printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
@@ -4061,14 +4050,6 @@ void __init lockdep_info(void)
printk(" per task-struct memory footprint: %lu bytes\n",
sizeof(struct held_lock) * MAX_LOCK_DEPTH);
-
-#ifdef CONFIG_DEBUG_LOCKDEP
- if (lockdep_init_error) {
- printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error);
- printk("Call stack leading to lockdep invocation was:\n");
- print_stack_trace(&lockdep_init_trace, 0);
- }
-#endif
}
static void
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h
index 5b9102a47ea5..c835270f0c2f 100644
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -67,7 +67,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
node->locked = 0;
node->next = NULL;
- prev = xchg_acquire(lock, node);
+ /*
+ * We rely on the full barrier with global transitivity implied by the
+ * below xchg() to order the initialization stores above against any
+ * observation of @node. And to provide the ACQUIRE ordering associated
+ * with a LOCK primitive.
+ */
+ prev = xchg(lock, node);
if (likely(prev == NULL)) {
/*
* Lock acquired, don't need to set node->locked to 1. Threads
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 0551c219c40e..e364b424b019 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -716,6 +716,7 @@ static inline void
__mutex_unlock_common_slowpath(struct mutex *lock, int nested)
{
unsigned long flags;
+ WAKE_Q(wake_q);
/*
* As a performance measurement, release the lock before doing other
@@ -743,11 +744,11 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested)
struct mutex_waiter, list);
debug_mutex_wake_waiter(lock, waiter);
-
- wake_up_process(waiter->task);
+ wake_q_add(&wake_q, waiter->task);
}
spin_unlock_mutex(&lock->wait_lock, flags);
+ wake_up_q(&wake_q);
}
/*
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 393d1874b9e0..ce2f75e32ae1 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -358,8 +358,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
* sequentiality; this is because not all clear_pending_set_locked()
* implementations imply full barriers.
*/
- while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK)
- cpu_relax();
+ smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
/*
* take ownership and clear the pending bit.
@@ -435,7 +434,7 @@ queue:
*
* The PV pv_wait_head_or_lock function, if active, will acquire
* the lock and return a non-zero value. So we have to skip the
- * smp_load_acquire() call. As the next PV queue head hasn't been
+ * smp_cond_acquire() call. As the next PV queue head hasn't been
* designated yet, there is no way for the locked value to become
* _Q_SLOW_VAL. So both the set_locked() and the
* atomic_cmpxchg_relaxed() calls will be safe.
@@ -466,7 +465,7 @@ locked:
break;
}
/*
- * The smp_load_acquire() call above has provided the necessary
+ * The smp_cond_acquire() call above has provided the necessary
* acquire semantics required for locking. At most two
* iterations of this loop may be ran.
*/
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 87bb235c3448..21ede57f68b3 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -55,6 +55,11 @@ struct pv_node {
};
/*
+ * Include queued spinlock statistics code
+ */
+#include "qspinlock_stat.h"
+
+/*
* By replacing the regular queued_spin_trylock() with the function below,
* it will be called once when a lock waiter enter the PV slowpath before
* being queued. By allowing one lock stealing attempt here when the pending
@@ -65,9 +70,11 @@ struct pv_node {
static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
{
struct __qspinlock *l = (void *)lock;
+ int ret = !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
+ (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
- return !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
- (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
+ qstat_inc(qstat_pv_lock_stealing, ret);
+ return ret;
}
/*
@@ -138,11 +145,6 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
#endif /* _Q_PENDING_BITS == 8 */
/*
- * Include queued spinlock statistics code
- */
-#include "qspinlock_stat.h"
-
-/*
* Lock and MCS node addresses hash table for fast lookup
*
* Hashing is done on a per-cacheline basis to minimize the need to access
@@ -398,6 +400,11 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
if (READ_ONCE(pn->state) == vcpu_hashed)
lp = (struct qspinlock **)1;
+ /*
+ * Tracking # of slowpath locking operations
+ */
+ qstat_inc(qstat_pv_lock_slowpath, true);
+
for (;; waitcnt++) {
/*
* Set correct vCPU state to be used by queue node wait-early
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index 640dcecdd1df..eb2a2c9bc3fc 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -22,6 +22,7 @@
* pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
* pv_latency_kick - average latency (ns) of vCPU kick operation
* pv_latency_wake - average latency (ns) from vCPU kick to wakeup
+ * pv_lock_slowpath - # of locking operations via the slowpath
* pv_lock_stealing - # of lock stealing operations
* pv_spurious_wakeup - # of spurious wakeups
* pv_wait_again - # of vCPU wait's that happened after a vCPU kick
@@ -45,6 +46,7 @@ enum qlock_stats {
qstat_pv_kick_wake,
qstat_pv_latency_kick,
qstat_pv_latency_wake,
+ qstat_pv_lock_slowpath,
qstat_pv_lock_stealing,
qstat_pv_spurious_wakeup,
qstat_pv_wait_again,
@@ -70,6 +72,7 @@ static const char * const qstat_names[qstat_num + 1] = {
[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
[qstat_pv_latency_kick] = "pv_latency_kick",
[qstat_pv_latency_wake] = "pv_latency_wake",
+ [qstat_pv_lock_slowpath] = "pv_lock_slowpath",
[qstat_pv_lock_stealing] = "pv_lock_stealing",
[qstat_pv_wait_again] = "pv_wait_again",
[qstat_pv_wait_early] = "pv_wait_early",
@@ -279,19 +282,6 @@ static inline void __pv_wait(u8 *ptr, u8 val)
#define pv_kick(c) __pv_kick(c)
#define pv_wait(p, v) __pv_wait(p, v)
-/*
- * PV unfair trylock count tracking function
- */
-static inline int qstat_spin_steal_lock(struct qspinlock *lock)
-{
- int ret = pv_queued_spin_steal_lock(lock);
-
- qstat_inc(qstat_pv_lock_stealing, ret);
- return ret;
-}
-#undef queued_spin_trylock
-#define queued_spin_trylock(l) qstat_spin_steal_lock(l)
-
#else /* CONFIG_QUEUED_LOCK_STAT */
static inline void qstat_inc(enum qlock_stats stat, bool cond) { }
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 8251e75dd9c0..3e746607abe5 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -99,13 +99,14 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
* 2) Drop lock->wait_lock
* 3) Try to unlock the lock with cmpxchg
*/
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+ unsigned long flags)
__releases(lock->wait_lock)
{
struct task_struct *owner = rt_mutex_owner(lock);
clear_rt_mutex_waiters(lock);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
/*
* If a new waiter comes in between the unlock and the cmpxchg
* we have two situations:
@@ -147,11 +148,12 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
/*
* Simple slow path only version: lock->owner is protected by lock->wait_lock.
*/
-static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
+static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock,
+ unsigned long flags)
__releases(lock->wait_lock)
{
lock->owner = NULL;
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
return true;
}
#endif
@@ -433,7 +435,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
int ret = 0, depth = 0;
struct rt_mutex *lock;
bool detect_deadlock;
- unsigned long flags;
bool requeue = true;
detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk);
@@ -476,7 +477,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/*
* [1] Task cannot go away as we did a get_task() before !
*/
- raw_spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock_irq(&task->pi_lock);
/*
* [2] Get the waiter on which @task is blocked on.
@@ -560,7 +561,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* operations.
*/
if (!raw_spin_trylock(&lock->wait_lock)) {
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock_irq(&task->pi_lock);
cpu_relax();
goto retry;
}
@@ -591,7 +592,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/*
* No requeue[7] here. Just release @task [8]
*/
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock(&task->pi_lock);
put_task_struct(task);
/*
@@ -599,14 +600,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
* If there is no owner of the lock, end of chain.
*/
if (!rt_mutex_owner(lock)) {
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
return 0;
}
/* [10] Grab the next task, i.e. owner of @lock */
task = rt_mutex_owner(lock);
get_task_struct(task);
- raw_spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock(&task->pi_lock);
/*
* No requeue [11] here. We just do deadlock detection.
@@ -621,8 +622,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
top_waiter = rt_mutex_top_waiter(lock);
/* [13] Drop locks */
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&task->pi_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
/* If owner is not blocked, end of chain. */
if (!next_lock)
@@ -643,7 +644,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
rt_mutex_enqueue(lock, waiter);
/* [8] Release the task */
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock(&task->pi_lock);
put_task_struct(task);
/*
@@ -661,14 +662,14 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
*/
if (prerequeue_top_waiter != rt_mutex_top_waiter(lock))
wake_up_process(rt_mutex_top_waiter(lock)->task);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
return 0;
}
/* [10] Grab the next task, i.e. the owner of @lock */
task = rt_mutex_owner(lock);
get_task_struct(task);
- raw_spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock(&task->pi_lock);
/* [11] requeue the pi waiters if necessary */
if (waiter == rt_mutex_top_waiter(lock)) {
@@ -722,8 +723,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
top_waiter = rt_mutex_top_waiter(lock);
/* [13] Drop the locks */
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock(&task->pi_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
/*
* Make the actual exit decisions [12], based on the stored
@@ -746,7 +747,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
goto again;
out_unlock_pi:
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock_irq(&task->pi_lock);
out_put_task:
put_task_struct(task);
@@ -756,7 +757,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
/*
* Try to take an rt-mutex
*
- * Must be called with lock->wait_lock held.
+ * Must be called with lock->wait_lock held and interrupts disabled
*
* @lock: The lock to be acquired.
* @task: The task which wants to acquire the lock
@@ -766,8 +767,6 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
struct rt_mutex_waiter *waiter)
{
- unsigned long flags;
-
/*
* Before testing whether we can acquire @lock, we set the
* RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all
@@ -852,7 +851,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
* case, but conditionals are more expensive than a redundant
* store.
*/
- raw_spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock(&task->pi_lock);
task->pi_blocked_on = NULL;
/*
* Finish the lock acquisition. @task is the new owner. If
@@ -861,7 +860,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task,
*/
if (rt_mutex_has_waiters(lock))
rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock));
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock(&task->pi_lock);
takeit:
/* We got the lock. */
@@ -883,7 +882,7 @@ takeit:
*
* Prepare waiter and propagate pi chain
*
- * This must be called with lock->wait_lock held.
+ * This must be called with lock->wait_lock held and interrupts disabled
*/
static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
@@ -894,7 +893,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
struct rt_mutex_waiter *top_waiter = waiter;
struct rt_mutex *next_lock;
int chain_walk = 0, res;
- unsigned long flags;
/*
* Early deadlock detection. We really don't want the task to
@@ -908,7 +906,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
if (owner == task)
return -EDEADLK;
- raw_spin_lock_irqsave(&task->pi_lock, flags);
+ raw_spin_lock(&task->pi_lock);
__rt_mutex_adjust_prio(task);
waiter->task = task;
waiter->lock = lock;
@@ -921,12 +919,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
task->pi_blocked_on = waiter;
- raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+ raw_spin_unlock(&task->pi_lock);
if (!owner)
return 0;
- raw_spin_lock_irqsave(&owner->pi_lock, flags);
+ raw_spin_lock(&owner->pi_lock);
if (waiter == rt_mutex_top_waiter(lock)) {
rt_mutex_dequeue_pi(owner, top_waiter);
rt_mutex_enqueue_pi(owner, waiter);
@@ -941,7 +939,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
/* Store the lock on which owner is blocked or NULL */
next_lock = task_blocked_on_lock(owner);
- raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+ raw_spin_unlock(&owner->pi_lock);
/*
* Even if full deadlock detection is on, if the owner is not
* blocked itself, we can avoid finding this out in the chain
@@ -957,12 +955,12 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
*/
get_task_struct(owner);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
res = rt_mutex_adjust_prio_chain(owner, chwalk, lock,
next_lock, waiter, task);
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irq(&lock->wait_lock);
return res;
}
@@ -971,15 +969,14 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
* Remove the top waiter from the current tasks pi waiter tree and
* queue it up.
*
- * Called with lock->wait_lock held.
+ * Called with lock->wait_lock held and interrupts disabled.
*/
static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
struct rt_mutex *lock)
{
struct rt_mutex_waiter *waiter;
- unsigned long flags;
- raw_spin_lock_irqsave(&current->pi_lock, flags);
+ raw_spin_lock(&current->pi_lock);
waiter = rt_mutex_top_waiter(lock);
@@ -1001,7 +998,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
*/
lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
- raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+ raw_spin_unlock(&current->pi_lock);
wake_q_add(wake_q, waiter->task);
}
@@ -1009,7 +1006,7 @@ static void mark_wakeup_next_waiter(struct wake_q_head *wake_q,
/*
* Remove a waiter from a lock and give up
*
- * Must be called with lock->wait_lock held and
+ * Must be called with lock->wait_lock held and interrupts disabled. I must
* have just failed to try_to_take_rt_mutex().
*/
static void remove_waiter(struct rt_mutex *lock,
@@ -1018,12 +1015,11 @@ static void remove_waiter(struct rt_mutex *lock,
bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock));
struct task_struct *owner = rt_mutex_owner(lock);
struct rt_mutex *next_lock;
- unsigned long flags;
- raw_spin_lock_irqsave(&current->pi_lock, flags);
+ raw_spin_lock(&current->pi_lock);
rt_mutex_dequeue(lock, waiter);
current->pi_blocked_on = NULL;
- raw_spin_unlock_irqrestore(&current->pi_lock, flags);
+ raw_spin_unlock(&current->pi_lock);
/*
* Only update priority if the waiter was the highest priority
@@ -1032,7 +1028,7 @@ static void remove_waiter(struct rt_mutex *lock,
if (!owner || !is_top_waiter)
return;
- raw_spin_lock_irqsave(&owner->pi_lock, flags);
+ raw_spin_lock(&owner->pi_lock);
rt_mutex_dequeue_pi(owner, waiter);
@@ -1044,7 +1040,7 @@ static void remove_waiter(struct rt_mutex *lock,
/* Store the lock on which owner is blocked or NULL */
next_lock = task_blocked_on_lock(owner);
- raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
+ raw_spin_unlock(&owner->pi_lock);
/*
* Don't walk the chain, if the owner task is not blocked
@@ -1056,12 +1052,12 @@ static void remove_waiter(struct rt_mutex *lock,
/* gets dropped in rt_mutex_adjust_prio_chain()! */
get_task_struct(owner);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock,
next_lock, NULL, current);
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irq(&lock->wait_lock);
}
/*
@@ -1097,11 +1093,11 @@ void rt_mutex_adjust_pi(struct task_struct *task)
* __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
* @lock: the rt_mutex to take
* @state: the state the task should block in (TASK_INTERRUPTIBLE
- * or TASK_UNINTERRUPTIBLE)
+ * or TASK_UNINTERRUPTIBLE)
* @timeout: the pre-initialized and started timer, or NULL for none
* @waiter: the pre-initialized rt_mutex_waiter
*
- * lock->wait_lock must be held by the caller.
+ * Must be called with lock->wait_lock held and interrupts disabled
*/
static int __sched
__rt_mutex_slowlock(struct rt_mutex *lock, int state,
@@ -1129,13 +1125,13 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
break;
}
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
schedule();
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irq(&lock->wait_lock);
set_current_state(state);
}
@@ -1172,17 +1168,26 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
enum rtmutex_chainwalk chwalk)
{
struct rt_mutex_waiter waiter;
+ unsigned long flags;
int ret = 0;
debug_rt_mutex_init_waiter(&waiter);
RB_CLEAR_NODE(&waiter.pi_tree_entry);
RB_CLEAR_NODE(&waiter.tree_entry);
- raw_spin_lock(&lock->wait_lock);
+ /*
+ * Technically we could use raw_spin_[un]lock_irq() here, but this can
+ * be called in early boot if the cmpxchg() fast path is disabled
+ * (debug, no architecture support). In this case we will acquire the
+ * rtmutex with lock->wait_lock held. But we cannot unconditionally
+ * enable interrupts in that early boot case. So we need to use the
+ * irqsave/restore variants.
+ */
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
/* Try to acquire the lock again: */
if (try_to_take_rt_mutex(lock, current, NULL)) {
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
return 0;
}
@@ -1211,7 +1216,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
*/
fixup_rt_mutex_waiters(lock);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
/* Remove pending timer: */
if (unlikely(timeout))
@@ -1227,6 +1232,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
*/
static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
{
+ unsigned long flags;
int ret;
/*
@@ -1238,10 +1244,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
return 0;
/*
- * The mutex has currently no owner. Lock the wait lock and
- * try to acquire the lock.
+ * The mutex has currently no owner. Lock the wait lock and try to
+ * acquire the lock. We use irqsave here to support early boot calls.
*/
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
ret = try_to_take_rt_mutex(lock, current, NULL);
@@ -1251,7 +1257,7 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
*/
fixup_rt_mutex_waiters(lock);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
return ret;
}
@@ -1263,7 +1269,10 @@ static inline int rt_mutex_slowtrylock(struct rt_mutex *lock)
static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
struct wake_q_head *wake_q)
{
- raw_spin_lock(&lock->wait_lock);
+ unsigned long flags;
+
+ /* irqsave required to support early boot calls */
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
debug_rt_mutex_unlock(lock);
@@ -1302,10 +1311,10 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
*/
while (!rt_mutex_has_waiters(lock)) {
/* Drops lock->wait_lock ! */
- if (unlock_rt_mutex_safe(lock) == true)
+ if (unlock_rt_mutex_safe(lock, flags) == true)
return false;
/* Relock the rtmutex and try again */
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irqsave(&lock->wait_lock, flags);
}
/*
@@ -1316,7 +1325,7 @@ static bool __sched rt_mutex_slowunlock(struct rt_mutex *lock,
*/
mark_wakeup_next_waiter(wake_q, lock);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
/* check PI boosting */
return true;
@@ -1596,10 +1605,10 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
{
int ret;
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irq(&lock->wait_lock);
if (try_to_take_rt_mutex(lock, task, NULL)) {
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
return 1;
}
@@ -1620,7 +1629,7 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
if (unlikely(ret))
remove_waiter(lock, waiter);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
debug_rt_mutex_print_deadlock(waiter);
@@ -1668,7 +1677,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
{
int ret;
- raw_spin_lock(&lock->wait_lock);
+ raw_spin_lock_irq(&lock->wait_lock);
set_current_state(TASK_INTERRUPTIBLE);
@@ -1684,7 +1693,7 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
*/
fixup_rt_mutex_waiters(lock);
- raw_spin_unlock(&lock->wait_lock);
+ raw_spin_unlock_irq(&lock->wait_lock);
return ret;
}