aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS16
-rw-r--r--arch/alpha/include/asm/rwsem.h68
-rw-r--r--arch/ia64/include/asm/mutex.h2
-rw-r--r--arch/ia64/include/asm/rwsem.h31
-rw-r--r--arch/powerpc/include/asm/mutex.h2
-rw-r--r--arch/s390/include/asm/rwsem.h37
-rw-r--r--arch/x86/include/asm/mutex_32.h2
-rw-r--r--arch/x86/include/asm/mutex_64.h6
-rw-r--r--arch/x86/include/asm/rwsem.h18
-rw-r--r--include/asm-generic/mutex-dec.h2
-rw-r--r--include/asm-generic/mutex-xchg.h6
-rw-r--r--include/asm-generic/qspinlock.h5
-rw-r--r--include/asm-generic/rwsem.h22
-rw-r--r--include/linux/compiler.h4
-rw-r--r--include/linux/percpu-refcount.h12
-rw-r--r--include/linux/rwsem.h8
-rw-r--r--kernel/locking/lockdep.c13
-rw-r--r--kernel/locking/mutex-debug.h4
-rw-r--r--kernel/locking/mutex.h10
-rw-r--r--kernel/locking/qspinlock.c72
-rw-r--r--kernel/locking/rtmutex.c2
-rw-r--r--kernel/locking/rwsem-xadd.c194
-rw-r--r--kernel/locking/rwsem.c8
-rw-r--r--kernel/locking/rwsem.h52
-rw-r--r--kernel/rcu/tree.c1
25 files changed, 334 insertions, 263 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 16700e4fcc4a..2223d5b02053 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7007,15 +7007,23 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/
S: Maintained
F: drivers/media/usb/dvb-usb-v2/lmedm04*
-LOCKDEP AND LOCKSTAT
+LOCKING PRIMITIVES
M: Peter Zijlstra <[email protected]>
M: Ingo Molnar <[email protected]>
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/locking
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
S: Maintained
-F: Documentation/locking/lockdep*.txt
-F: Documentation/locking/lockstat.txt
+F: Documentation/locking/
F: include/linux/lockdep.h
+F: include/linux/spinlock*.h
+F: arch/*/include/asm/spinlock*.h
+F: include/linux/rwlock*.h
+F: include/linux/mutex*.h
+F: arch/*/include/asm/mutex*.h
+F: include/linux/rwsem*.h
+F: arch/*/include/asm/rwsem.h
+F: include/linux/seqlock.h
+F: lib/locking*.[ch]
F: kernel/locking/
LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP/Vista Dynamic Disks)
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
index 0131a7058778..77873d0ad293 100644
--- a/arch/alpha/include/asm/rwsem.h
+++ b/arch/alpha/include/asm/rwsem.h
@@ -25,8 +25,8 @@ static inline void __down_read(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
- oldcount = sem->count;
- sem->count += RWSEM_ACTIVE_READ_BIAS;
+ oldcount = sem->count.counter;
+ sem->count.counter += RWSEM_ACTIVE_READ_BIAS;
#else
long temp;
__asm__ __volatile__(
@@ -52,13 +52,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
{
long old, new, res;
- res = sem->count;
+ res = atomic_long_read(&sem->count);
do {
new = res + RWSEM_ACTIVE_READ_BIAS;
if (new <= 0)
break;
old = res;
- res = cmpxchg(&sem->count, old, new);
+ res = atomic_long_cmpxchg(&sem->count, old, new);
} while (res != old);
return res >= 0 ? 1 : 0;
}
@@ -67,8 +67,8 @@ static inline long ___down_write(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
- oldcount = sem->count;
- sem->count += RWSEM_ACTIVE_WRITE_BIAS;
+ oldcount = sem->count.counter;
+ sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS;
#else
long temp;
__asm__ __volatile__(
@@ -106,7 +106,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem)
*/
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
- long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+ long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
RWSEM_ACTIVE_WRITE_BIAS);
if (ret == RWSEM_UNLOCKED_VALUE)
return 1;
@@ -117,8 +117,8 @@ static inline void __up_read(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
- oldcount = sem->count;
- sem->count -= RWSEM_ACTIVE_READ_BIAS;
+ oldcount = sem->count.counter;
+ sem->count.counter -= RWSEM_ACTIVE_READ_BIAS;
#else
long temp;
__asm__ __volatile__(
@@ -142,8 +142,8 @@ static inline void __up_write(struct rw_semaphore *sem)
{
long count;
#ifndef CONFIG_SMP
- sem->count -= RWSEM_ACTIVE_WRITE_BIAS;
- count = sem->count;
+ sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS;
+ count = sem->count.counter;
#else
long temp;
__asm__ __volatile__(
@@ -171,8 +171,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
{
long oldcount;
#ifndef CONFIG_SMP
- oldcount = sem->count;
- sem->count -= RWSEM_WAITING_BIAS;
+ oldcount = sem->count.counter;
+ sem->count.counter -= RWSEM_WAITING_BIAS;
#else
long temp;
__asm__ __volatile__(
@@ -191,47 +191,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
rwsem_downgrade_wake(sem);
}
-static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem)
-{
-#ifndef CONFIG_SMP
- sem->count += val;
-#else
- long temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%2,%0\n"
- " stq_c %0,%1\n"
- " beq %0,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (temp), "=m" (sem->count)
- :"Ir" (val), "m" (sem->count));
-#endif
-}
-
-static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
-{
-#ifndef CONFIG_SMP
- sem->count += val;
- return sem->count;
-#else
- long ret, temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%3,%2\n"
- " addq %0,%3,%0\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (ret), "=m" (sem->count), "=&r" (temp)
- :"Ir" (val), "m" (sem->count));
-
- return ret;
-#endif
-}
-
#endif /* __KERNEL__ */
#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h
index f41e66d65e31..28cb819e0ff9 100644
--- a/arch/ia64/include/asm/mutex.h
+++ b/arch/ia64/include/asm/mutex.h
@@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (cmpxchg_acq(count, 1, 0) == 1)
+ if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1)
return 1;
return 0;
}
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
index 8b23e070b844..8fa98dd303b4 100644
--- a/arch/ia64/include/asm/rwsem.h
+++ b/arch/ia64/include/asm/rwsem.h
@@ -40,7 +40,7 @@
static inline void
__down_read (struct rw_semaphore *sem)
{
- long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1);
+ long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1);
if (result < 0)
rwsem_down_read_failed(sem);
@@ -55,9 +55,9 @@ ___down_write (struct rw_semaphore *sem)
long old, new;
do {
- old = sem->count;
+ old = atomic_long_read(&sem->count);
new = old + RWSEM_ACTIVE_WRITE_BIAS;
- } while (cmpxchg_acq(&sem->count, old, new) != old);
+ } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old);
return old;
}
@@ -85,7 +85,7 @@ __down_write_killable (struct rw_semaphore *sem)
static inline void
__up_read (struct rw_semaphore *sem)
{
- long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1);
+ long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1);
if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
rwsem_wake(sem);
@@ -100,9 +100,9 @@ __up_write (struct rw_semaphore *sem)
long old, new;
do {
- old = sem->count;
+ old = atomic_long_read(&sem->count);
new = old - RWSEM_ACTIVE_WRITE_BIAS;
- } while (cmpxchg_rel(&sem->count, old, new) != old);
+ } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
rwsem_wake(sem);
@@ -115,8 +115,8 @@ static inline int
__down_read_trylock (struct rw_semaphore *sem)
{
long tmp;
- while ((tmp = sem->count) >= 0) {
- if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) {
+ while ((tmp = atomic_long_read(&sem->count)) >= 0) {
+ if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) {
return 1;
}
}
@@ -129,8 +129,8 @@ __down_read_trylock (struct rw_semaphore *sem)
static inline int
__down_write_trylock (struct rw_semaphore *sem)
{
- long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
+ long tmp = atomic_long_cmpxchg_acquire(&sem->count,
+ RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS);
return tmp == RWSEM_UNLOCKED_VALUE;
}
@@ -143,19 +143,12 @@ __downgrade_write (struct rw_semaphore *sem)
long old, new;
do {
- old = sem->count;
+ old = atomic_long_read(&sem->count);
new = old - RWSEM_WAITING_BIAS;
- } while (cmpxchg_rel(&sem->count, old, new) != old);
+ } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
if (old < 0)
rwsem_downgrade_wake(sem);
}
-/*
- * Implement atomic add functionality. These used to be "inline" functions, but GCC v3.1
- * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd.
- */
-#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count))
-#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count))
-
#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
index 127ab23e1f6c..078155fa1189 100644
--- a/arch/powerpc/include/asm/mutex.h
+++ b/arch/powerpc/include/asm/mutex.h
@@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
return 1;
return 0;
}
diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h
index c75e4471e618..597e7e96b59e 100644
--- a/arch/s390/include/asm/rwsem.h
+++ b/arch/s390/include/asm/rwsem.h
@@ -207,41 +207,4 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
rwsem_downgrade_wake(sem);
}
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
- signed long old, new;
-
- asm volatile(
- " lg %0,%2\n"
- "0: lgr %1,%0\n"
- " agr %1,%4\n"
- " csg %0,%1,%2\n"
- " jl 0b"
- : "=&d" (old), "=&d" (new), "=Q" (sem->count)
- : "Q" (sem->count), "d" (delta)
- : "cc", "memory");
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
- signed long old, new;
-
- asm volatile(
- " lg %0,%2\n"
- "0: lgr %1,%0\n"
- " agr %1,%4\n"
- " csg %0,%1,%2\n"
- " jl 0b"
- : "=&d" (old), "=&d" (new), "=Q" (sem->count)
- : "Q" (sem->count), "d" (delta)
- : "cc", "memory");
- return new;
-}
-
#endif /* _S390_RWSEM_H */
diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h
index 85e6cda45a02..e9355a84fc67 100644
--- a/arch/x86/include/asm/mutex_32.h
+++ b/arch/x86/include/asm/mutex_32.h
@@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count,
int (*fail_fn)(atomic_t *))
{
/* cmpxchg because it never induces a false contention state. */
- if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
return 1;
return 0;
diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h
index 07537a44216e..d9850758464e 100644
--- a/arch/x86/include/asm/mutex_64.h
+++ b/arch/x86/include/asm/mutex_64.h
@@ -118,10 +118,10 @@ do { \
static inline int __mutex_fastpath_trylock(atomic_t *count,
int (*fail_fn)(atomic_t *))
{
- if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1))
return 1;
- else
- return 0;
+
+ return 0;
}
#endif /* _ASM_X86_MUTEX_64_H */
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index 453744c1d347..089ced4edbbc 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -213,23 +213,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
: "memory", "cc");
}
-/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
- asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0"
- : "+m" (sem->count)
- : "er" (delta));
-}
-
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
- return delta + xadd(&sem->count, delta);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_RWSEM_H */
diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h
index fd694cfd678a..c54829d3de37 100644
--- a/include/asm-generic/mutex-dec.h
+++ b/include/asm-generic/mutex-dec.h
@@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1))
+ if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1))
return 1;
return 0;
}
diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h
index a6b4a7bd6ac9..3269ec4e195f 100644
--- a/include/asm-generic/mutex-xchg.h
+++ b/include/asm-generic/mutex-xchg.h
@@ -91,8 +91,12 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
static inline int
__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
{
- int prev = atomic_xchg_acquire(count, 0);
+ int prev;
+ if (atomic_read(count) != 1)
+ return 0;
+
+ prev = atomic_xchg_acquire(count, 0);
if (unlikely(prev < 0)) {
/*
* The lock was marked contended so we must restore that
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 05f05f17a7c2..9f0681bf1e87 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -111,10 +111,9 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
static __always_inline void queued_spin_unlock(struct qspinlock *lock)
{
/*
- * smp_mb__before_atomic() in order to guarantee release semantics
+ * unlock() needs release semantics:
*/
- smp_mb__before_atomic();
- atomic_sub(_Q_LOCKED_VAL, &lock->val);
+ (void)atomic_sub_return_release(_Q_LOCKED_VAL, &lock->val);
}
#endif
diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
index 3fc94a046bf5..5be122e3d326 100644
--- a/include/asm-generic/rwsem.h
+++ b/include/asm-generic/rwsem.h
@@ -41,8 +41,8 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
{
long tmp;
- while ((tmp = sem->count) >= 0) {
- if (tmp == cmpxchg_acquire(&sem->count, tmp,
+ while ((tmp = atomic_long_read(&sem->count)) >= 0) {
+ if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
tmp + RWSEM_ACTIVE_READ_BIAS)) {
return 1;
}
@@ -79,7 +79,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
{
long tmp;
- tmp = cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
+ tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
RWSEM_ACTIVE_WRITE_BIAS);
return tmp == RWSEM_UNLOCKED_VALUE;
}
@@ -107,14 +107,6 @@ static inline void __up_write(struct rw_semaphore *sem)
}
/*
- * implement atomic add functionality
- */
-static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem)
-{
- atomic_long_add(delta, (atomic_long_t *)&sem->count);
-}
-
-/*
* downgrade write lock to read lock
*/
static inline void __downgrade_write(struct rw_semaphore *sem)
@@ -134,13 +126,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
rwsem_downgrade_wake(sem);
}
-/*
- * implement exchange and add functionality
- */
-static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem)
-{
- return atomic_long_add_return(delta, (atomic_long_t *)&sem->count);
-}
-
#endif /* __KERNEL__ */
#endif /* _ASM_GENERIC_RWSEM_H */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 793c0829e3a3..06f27fd9d760 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -545,10 +545,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
* Similar to rcu_dereference(), but for situations where the pointed-to
* object's lifetime is managed by something other than RCU. That
* "something other" might be reference counting or simple immortality.
+ *
+ * The seemingly unused void * variable is to validate @p is indeed a pointer
+ * type. All pointer types silently cast to void *.
*/
#define lockless_dereference(p) \
({ \
typeof(p) _________p1 = READ_ONCE(p); \
+ __maybe_unused const void * const _________p2 = _________p1; \
smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
(_________p1); \
})
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 84f542df7ff5..1c7eec09e5eb 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -136,14 +136,12 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
* used as a pointer. If the compiler generates a separate fetch
* when using it as a pointer, __PERCPU_REF_ATOMIC may be set in
* between contaminating the pointer value, meaning that
- * ACCESS_ONCE() is required when fetching it.
- *
- * Also, we need a data dependency barrier to be paired with
- * smp_store_release() in __percpu_ref_switch_to_percpu().
- *
- * Use lockless deref which contains both.
+ * READ_ONCE() is required when fetching it.
*/
- percpu_ptr = lockless_dereference(ref->percpu_count_ptr);
+ percpu_ptr = READ_ONCE(ref->percpu_count_ptr);
+
+ /* paired with smp_store_release() in __percpu_ref_switch_to_percpu() */
+ smp_read_barrier_depends();
/*
* Theoretically, the following could test just ATOMIC; however,
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index d37fbb34d06f..dd1d14250340 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -23,10 +23,11 @@ struct rw_semaphore;
#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
#include <linux/rwsem-spinlock.h> /* use a generic implementation */
+#define __RWSEM_INIT_COUNT(name) .count = RWSEM_UNLOCKED_VALUE
#else
/* All arch specific implementations share the same struct */
struct rw_semaphore {
- long count;
+ atomic_long_t count;
struct list_head wait_list;
raw_spinlock_t wait_lock;
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
@@ -54,9 +55,10 @@ extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
/* In all implementations count != 0 means locked */
static inline int rwsem_is_locked(struct rw_semaphore *sem)
{
- return sem->count != 0;
+ return atomic_long_read(&sem->count) != 0;
}
+#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
#endif
/* Common initializer macros and functions */
@@ -74,7 +76,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
#endif
#define __RWSEM_INITIALIZER(name) \
- { .count = RWSEM_UNLOCKED_VALUE, \
+ { __RWSEM_INIT_COUNT(name), \
.wait_list = LIST_HEAD_INIT((name).wait_list), \
.wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \
__RWSEM_OPT_INIT(name) \
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 81f1a7107c0e..589d763a49b3 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -46,6 +46,7 @@
#include <linux/gfp.h>
#include <linux/kmemcheck.h>
#include <linux/random.h>
+#include <linux/jhash.h>
#include <asm/sections.h>
@@ -309,10 +310,14 @@ static struct hlist_head chainhash_table[CHAINHASH_SIZE];
* It's a 64-bit hash, because it's important for the keys to be
* unique.
*/
-#define iterate_chain_key(key1, key2) \
- (((key1) << MAX_LOCKDEP_KEYS_BITS) ^ \
- ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \
- (key2))
+static inline u64 iterate_chain_key(u64 key, u32 idx)
+{
+ u32 k0 = key, k1 = key >> 32;
+
+ __jhash_mix(idx, k0, k1); /* Macro that modifies arguments! */
+
+ return k0 | (u64)k1 << 32;
+}
void lockdep_off(void)
{
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h
index 0799fd3e4cfa..372e6530180d 100644
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -29,12 +29,12 @@ extern void debug_mutex_init(struct mutex *lock, const char *name,
static inline void mutex_set_owner(struct mutex *lock)
{
- lock->owner = current;
+ WRITE_ONCE(lock->owner, current);
}
static inline void mutex_clear_owner(struct mutex *lock)
{
- lock->owner = NULL;
+ WRITE_ONCE(lock->owner, NULL);
}
#define spin_lock_mutex(lock, flags) \
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 5cda397607f2..12f96199441c 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -17,14 +17,20 @@
__list_del((waiter)->list.prev, (waiter)->list.next)
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
+/*
+ * The mutex owner can get read and written to locklessly.
+ * We should use WRITE_ONCE when writing the owner value to
+ * avoid store tearing, otherwise, a thread could potentially
+ * read a partially written and incomplete owner value.
+ */
static inline void mutex_set_owner(struct mutex *lock)
{
- lock->owner = current;
+ WRITE_ONCE(lock->owner, current);
}
static inline void mutex_clear_owner(struct mutex *lock)
{
- lock->owner = NULL;
+ WRITE_ONCE(lock->owner, NULL);
}
#else
static inline void mutex_set_owner(struct mutex *lock)
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 5fc8c311b8fe..2f9153b183c9 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -90,7 +90,7 @@ static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
* therefore increment the cpu number by one.
*/
-static inline u32 encode_tail(int cpu, int idx)
+static inline __pure u32 encode_tail(int cpu, int idx)
{
u32 tail;
@@ -103,7 +103,7 @@ static inline u32 encode_tail(int cpu, int idx)
return tail;
}
-static inline struct mcs_spinlock *decode_tail(u32 tail)
+static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
{
int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
@@ -268,6 +268,63 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
#endif
/*
+ * Various notes on spin_is_locked() and spin_unlock_wait(), which are
+ * 'interesting' functions:
+ *
+ * PROBLEM: some architectures have an interesting issue with atomic ACQUIRE
+ * operations in that the ACQUIRE applies to the LOAD _not_ the STORE (ARM64,
+ * PPC). Also qspinlock has a similar issue per construction, the setting of
+ * the locked byte can be unordered acquiring the lock proper.
+ *
+ * This gets to be 'interesting' in the following cases, where the /should/s
+ * end up false because of this issue.
+ *
+ *
+ * CASE 1:
+ *
+ * So the spin_is_locked() correctness issue comes from something like:
+ *
+ * CPU0 CPU1
+ *
+ * global_lock(); local_lock(i)
+ * spin_lock(&G) spin_lock(&L[i])
+ * for (i) if (!spin_is_locked(&G)) {
+ * spin_unlock_wait(&L[i]); smp_acquire__after_ctrl_dep();
+ * return;
+ * }
+ * // deal with fail
+ *
+ * Where it is important CPU1 sees G locked or CPU0 sees L[i] locked such
+ * that there is exclusion between the two critical sections.
+ *
+ * The load from spin_is_locked(&G) /should/ be constrained by the ACQUIRE from
+ * spin_lock(&L[i]), and similarly the load(s) from spin_unlock_wait(&L[i])
+ * /should/ be constrained by the ACQUIRE from spin_lock(&G).
+ *
+ * Similarly, later stuff is constrained by the ACQUIRE from CTRL+RMB.
+ *
+ *
+ * CASE 2:
+ *
+ * For spin_unlock_wait() there is a second correctness issue, namely:
+ *
+ * CPU0 CPU1
+ *
+ * flag = set;
+ * smp_mb(); spin_lock(&l)
+ * spin_unlock_wait(&l); if (!flag)
+ * // add to lockless list
+ * spin_unlock(&l);
+ * // iterate lockless list
+ *
+ * Which wants to ensure that CPU1 will stop adding bits to the list and CPU0
+ * will observe the last entry on the list (if spin_unlock_wait() had ACQUIRE
+ * semantics etc..)
+ *
+ * Where flag /should/ be ordered against the locked store of l.
+ */
+
+/*
* queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
* issuing an _unordered_ store to set _Q_LOCKED_VAL.
*
@@ -455,6 +512,8 @@ queue:
* pending stuff.
*
* p,*,* -> n,*,*
+ *
+ * RELEASE, such that the stores to @node must be complete.
*/
old = xchg_tail(lock, tail);
next = NULL;
@@ -465,6 +524,15 @@ queue:
*/
if (old & _Q_TAIL_MASK) {
prev = decode_tail(old);
+ /*
+ * The above xchg_tail() is also a load of @lock which generates,
+ * through decode_tail(), a pointer.
+ *
+ * The address dependency matches the RELEASE of xchg_tail()
+ * such that the access to @prev must happen after.
+ */
+ smp_read_barrier_depends();
+
WRITE_ONCE(prev->next, node);
pv_wait_node(node, prev);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 3e746607abe5..1ec0f48962b3 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1478,7 +1478,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
*/
int __sched rt_mutex_trylock(struct rt_mutex *lock)
{
- if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
+ if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
return 0;
return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 09e30c6225e5..2031281bb940 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -80,7 +80,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
lockdep_init_map(&sem->dep_map, name, key, 0);
#endif
- sem->count = RWSEM_UNLOCKED_VALUE;
+ atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
raw_spin_lock_init(&sem->wait_lock);
INIT_LIST_HEAD(&sem->wait_list);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
@@ -114,12 +114,16 @@ enum rwsem_wake_type {
* - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
* - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
* - there must be someone on the queue
- * - the spinlock must be held by the caller
+ * - the wait_lock must be held by the caller
+ * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
+ * to actually wakeup the blocked task(s) and drop the reference count,
+ * preferably when the wait_lock is released
* - woken process blocks are discarded from the list after having task zeroed
- * - writers are only woken if downgrading is false
+ * - writers are only marked woken if downgrading is false
*/
static struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
+__rwsem_mark_wake(struct rw_semaphore *sem,
+ enum rwsem_wake_type wake_type, struct wake_q_head *wake_q)
{
struct rwsem_waiter *waiter;
struct task_struct *tsk;
@@ -128,13 +132,16 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
- if (wake_type == RWSEM_WAKE_ANY)
- /* Wake writer at the front of the queue, but do not
- * grant it the lock yet as we want other writers
- * to be able to steal it. Readers, on the other hand,
- * will block as they will notice the queued writer.
+ if (wake_type == RWSEM_WAKE_ANY) {
+ /*
+ * Mark writer at the front of the queue for wakeup.
+ * Until the task is actually later awoken later by
+ * the caller, other writers are able to steal it.
+ * Readers, on the other hand, will block as they
+ * will notice the queued writer.
*/
- wake_up_process(waiter->task);
+ wake_q_add(wake_q, waiter->task);
+ }
goto out;
}
@@ -146,15 +153,27 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
if (wake_type != RWSEM_WAKE_READ_OWNED) {
adjustment = RWSEM_ACTIVE_READ_BIAS;
try_reader_grant:
- oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
+ oldcount = atomic_long_add_return(adjustment, &sem->count) - adjustment;
+
if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
- /* A writer stole the lock. Undo our reader grant. */
- if (rwsem_atomic_update(-adjustment, sem) &
- RWSEM_ACTIVE_MASK)
+ /*
+ * If the count is still less than RWSEM_WAITING_BIAS
+ * after removing the adjustment, it is assumed that
+ * a writer has stolen the lock. We have to undo our
+ * reader grant.
+ */
+ if (atomic_long_add_return(-adjustment, &sem->count) <
+ RWSEM_WAITING_BIAS)
goto out;
/* Last active locker left. Retry waking readers. */
goto try_reader_grant;
}
+ /*
+ * It is not really necessary to set it to reader-owned here,
+ * but it gives the spinners an early indication that the
+ * readers now have the lock.
+ */
+ rwsem_set_reader_owned(sem);
}
/* Grant an infinite number of read locks to the readers at the front
@@ -179,7 +198,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
adjustment -= RWSEM_WAITING_BIAS;
if (adjustment)
- rwsem_atomic_add(adjustment, sem);
+ atomic_long_add(adjustment, &sem->count);
next = sem->wait_list.next;
loop = woken;
@@ -187,17 +206,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
waiter = list_entry(next, struct rwsem_waiter, list);
next = waiter->list.next;
tsk = waiter->task;
+
+ wake_q_add(wake_q, tsk);
/*
- * Make sure we do not wakeup the next reader before
- * setting the nil condition to grant the next reader;
- * otherwise we could miss the wakeup on the other
- * side and end up sleeping again. See the pairing
- * in rwsem_down_read_failed().
+ * Ensure that the last operation is setting the reader
+ * waiter to nil such that rwsem_down_read_failed() cannot
+ * race with do_exit() by always holding a reference count
+ * to the task to wakeup.
*/
- smp_mb();
- waiter->task = NULL;
- wake_up_process(tsk);
- put_task_struct(tsk);
+ smp_store_release(&waiter->task, NULL);
} while (--loop);
sem->wait_list.next = next;
@@ -216,11 +233,11 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
struct rwsem_waiter waiter;
struct task_struct *tsk = current;
+ WAKE_Q(wake_q);
/* set up my own style of waitqueue */
waiter.task = tsk;
waiter.type = RWSEM_WAITING_FOR_READ;
- get_task_struct(tsk);
raw_spin_lock_irq(&sem->wait_lock);
if (list_empty(&sem->wait_list))
@@ -228,7 +245,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
list_add_tail(&waiter.list, &sem->wait_list);
/* we're now waiting on the lock, but no longer actively locking */
- count = rwsem_atomic_update(adjustment, sem);
+ count = atomic_long_add_return(adjustment, &sem->count);
/* If there are no active locks, wake the front queued process(es).
*
@@ -238,9 +255,10 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
if (count == RWSEM_WAITING_BIAS ||
(count > RWSEM_WAITING_BIAS &&
adjustment != -RWSEM_ACTIVE_READ_BIAS))
- sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+ sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
+ wake_up_q(&wake_q);
/* wait to be given the lock */
while (true) {
@@ -255,17 +273,29 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
}
EXPORT_SYMBOL(rwsem_down_read_failed);
+/*
+ * This function must be called with the sem->wait_lock held to prevent
+ * race conditions between checking the rwsem wait list and setting the
+ * sem->count accordingly.
+ */
static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
{
/*
- * Try acquiring the write lock. Check count first in order
- * to reduce unnecessary expensive cmpxchg() operations.
+ * Avoid trying to acquire write lock if count isn't RWSEM_WAITING_BIAS.
*/
- if (count == RWSEM_WAITING_BIAS &&
- cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS,
- RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
- if (!list_is_singular(&sem->wait_list))
- rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+ if (count != RWSEM_WAITING_BIAS)
+ return false;
+
+ /*
+ * Acquire the lock by trying to set it to ACTIVE_WRITE_BIAS. If there
+ * are other tasks on the wait list, we need to add on WAITING_BIAS.
+ */
+ count = list_is_singular(&sem->wait_list) ?
+ RWSEM_ACTIVE_WRITE_BIAS :
+ RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS;
+
+ if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count)
+ == RWSEM_WAITING_BIAS) {
rwsem_set_owner(sem);
return true;
}
@@ -279,13 +309,13 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
*/
static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
{
- long old, count = READ_ONCE(sem->count);
+ long old, count = atomic_long_read(&sem->count);
while (true) {
if (!(count == 0 || count == RWSEM_WAITING_BIAS))
return false;
- old = cmpxchg_acquire(&sem->count, count,
+ old = atomic_long_cmpxchg_acquire(&sem->count, count,
count + RWSEM_ACTIVE_WRITE_BIAS);
if (old == count) {
rwsem_set_owner(sem);
@@ -306,16 +336,11 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
rcu_read_lock();
owner = READ_ONCE(sem->owner);
- if (!owner) {
- long count = READ_ONCE(sem->count);
+ if (!rwsem_owner_is_writer(owner)) {
/*
- * If sem->owner is not set, yet we have just recently entered the
- * slowpath with the lock being active, then there is a possibility
- * reader(s) may have the lock. To be safe, bail spinning in these
- * situations.
+ * Don't spin if the rwsem is readers owned.
*/
- if (count & RWSEM_ACTIVE_MASK)
- ret = false;
+ ret = !rwsem_owner_is_reader(owner);
goto done;
}
@@ -325,10 +350,15 @@ done:
return ret;
}
-static noinline
-bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
+/*
+ * Return true only if we can still spin on the owner field of the rwsem.
+ */
+static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem)
{
- long count;
+ struct task_struct *owner = READ_ONCE(sem->owner);
+
+ if (!rwsem_owner_is_writer(owner))
+ goto out;
rcu_read_lock();
while (sem->owner == owner) {
@@ -349,22 +379,16 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
cpu_relax_lowlatency();
}
rcu_read_unlock();
-
- if (READ_ONCE(sem->owner))
- return true; /* new owner, continue spinning */
-
+out:
/*
- * When the owner is not set, the lock could be free or
- * held by readers. Check the counter to verify the
- * state.
+ * If there is a new owner or the owner is not set, we continue
+ * spinning.
*/
- count = READ_ONCE(sem->count);
- return (count == 0 || count == RWSEM_WAITING_BIAS);
+ return !rwsem_owner_is_reader(READ_ONCE(sem->owner));
}
static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
{
- struct task_struct *owner;
bool taken = false;
preempt_disable();
@@ -376,12 +400,17 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
if (!osq_lock(&sem->osq))
goto done;
- while (true) {
- owner = READ_ONCE(sem->owner);
- if (owner && !rwsem_spin_on_owner(sem, owner))
- break;
-
- /* wait_lock will be acquired if write_lock is obtained */
+ /*
+ * Optimistically spin on the owner field and attempt to acquire the
+ * lock whenever the owner changes. Spinning will be stopped when:
+ * 1) the owning writer isn't running; or
+ * 2) readers own the lock as we can't determine if they are
+ * actively running or not.
+ */
+ while (rwsem_spin_on_owner(sem)) {
+ /*
+ * Try to acquire the lock
+ */
if (rwsem_try_write_lock_unqueued(sem)) {
taken = true;
break;
@@ -393,7 +422,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
* we're an RT task that will live-lock because we won't let
* the owner complete.
*/
- if (!owner && (need_resched() || rt_task(current)))
+ if (!sem->owner && (need_resched() || rt_task(current)))
break;
/*
@@ -440,9 +469,10 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
bool waiting = true; /* any queued threads before us */
struct rwsem_waiter waiter;
struct rw_semaphore *ret = sem;
+ WAKE_Q(wake_q);
/* undo write bias from down_write operation, stop active locking */
- count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
+ count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count);
/* do optimistic spinning and steal lock if possible */
if (rwsem_optimistic_spin(sem))
@@ -465,18 +495,29 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
/* we're now waiting on the lock, but no longer actively locking */
if (waiting) {
- count = READ_ONCE(sem->count);
+ count = atomic_long_read(&sem->count);
/*
* If there were already threads queued before us and there are
* no active writers, the lock must be read owned; so we try to
* wake any read locks that were queued ahead of us.
*/
- if (count > RWSEM_WAITING_BIAS)
- sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+ if (count > RWSEM_WAITING_BIAS) {
+ WAKE_Q(wake_q);
+
+ sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
+ /*
+ * The wakeup is normally called _after_ the wait_lock
+ * is released, but given that we are proactively waking
+ * readers we can deal with the wake_q overhead as it is
+ * similar to releasing and taking the wait_lock again
+ * for attempting rwsem_try_write_lock().
+ */
+ wake_up_q(&wake_q);
+ }
} else
- count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+ count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count);
/* wait until we successfully acquire the lock */
set_current_state(state);
@@ -492,7 +533,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
schedule();
set_current_state(state);
- } while ((count = sem->count) & RWSEM_ACTIVE_MASK);
+ } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
raw_spin_lock_irq(&sem->wait_lock);
}
@@ -507,10 +548,11 @@ out_nolock:
raw_spin_lock_irq(&sem->wait_lock);
list_del(&waiter.list);
if (list_empty(&sem->wait_list))
- rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem);
+ atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
else
- __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+ __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irq(&sem->wait_lock);
+ wake_up_q(&wake_q);
return ERR_PTR(-EINTR);
}
@@ -537,6 +579,7 @@ __visible
struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
{
unsigned long flags;
+ WAKE_Q(wake_q);
/*
* If a spinner is present, it is not necessary to do the wakeup.
@@ -573,9 +616,10 @@ locked:
/* do nothing if list empty */
if (!list_empty(&sem->wait_list))
- sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+ sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+ wake_up_q(&wake_q);
return sem;
}
@@ -590,14 +634,16 @@ __visible
struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
{
unsigned long flags;
+ WAKE_Q(wake_q);
raw_spin_lock_irqsave(&sem->wait_lock, flags);
/* do nothing if list empty */
if (!list_empty(&sem->wait_list))
- sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
+ sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+ wake_up_q(&wake_q);
return sem;
}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 2e853ad93a3a..45ba475d4be3 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -22,6 +22,7 @@ void __sched down_read(struct rw_semaphore *sem)
rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
+ rwsem_set_reader_owned(sem);
}
EXPORT_SYMBOL(down_read);
@@ -33,8 +34,10 @@ int down_read_trylock(struct rw_semaphore *sem)
{
int ret = __down_read_trylock(sem);
- if (ret == 1)
+ if (ret == 1) {
rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
+ rwsem_set_reader_owned(sem);
+ }
return ret;
}
@@ -124,7 +127,7 @@ void downgrade_write(struct rw_semaphore *sem)
* lockdep: a downgraded write will live on as a write
* dependency.
*/
- rwsem_clear_owner(sem);
+ rwsem_set_reader_owned(sem);
__downgrade_write(sem);
}
@@ -138,6 +141,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
+ rwsem_set_reader_owned(sem);
}
EXPORT_SYMBOL(down_read_nested);
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index 870ed9a5b426..a699f4048ba1 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,14 +1,58 @@
+/*
+ * The owner field of the rw_semaphore structure will be set to
+ * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear
+ * the owner field when it unlocks. A reader, on the other hand, will
+ * not touch the owner field when it unlocks.
+ *
+ * In essence, the owner field now has the following 3 states:
+ * 1) 0
+ * - lock is free or the owner hasn't set the field yet
+ * 2) RWSEM_READER_OWNED
+ * - lock is currently or previously owned by readers (lock is free
+ * or not set by owner yet)
+ * 3) Other non-zero value
+ * - a writer owns the lock
+ */
+#define RWSEM_READER_OWNED ((struct task_struct *)1UL)
+
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+/*
+ * All writes to owner are protected by WRITE_ONCE() to make sure that
+ * store tearing can't happen as optimistic spinners may read and use
+ * the owner value concurrently without lock. Read from owner, however,
+ * may not need READ_ONCE() as long as the pointer value is only used
+ * for comparison and isn't being dereferenced.
+ */
static inline void rwsem_set_owner(struct rw_semaphore *sem)
{
- sem->owner = current;
+ WRITE_ONCE(sem->owner, current);
}
static inline void rwsem_clear_owner(struct rw_semaphore *sem)
{
- sem->owner = NULL;
+ WRITE_ONCE(sem->owner, NULL);
+}
+
+static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
+{
+ /*
+ * We check the owner value first to make sure that we will only
+ * do a write to the rwsem cacheline when it is really necessary
+ * to minimize cacheline contention.
+ */
+ if (sem->owner != RWSEM_READER_OWNED)
+ WRITE_ONCE(sem->owner, RWSEM_READER_OWNED);
+}
+
+static inline bool rwsem_owner_is_writer(struct task_struct *owner)
+{
+ return owner && owner != RWSEM_READER_OWNED;
}
+static inline bool rwsem_owner_is_reader(struct task_struct *owner)
+{
+ return owner == RWSEM_READER_OWNED;
+}
#else
static inline void rwsem_set_owner(struct rw_semaphore *sem)
{
@@ -17,4 +61,8 @@ static inline void rwsem_set_owner(struct rw_semaphore *sem)
static inline void rwsem_clear_owner(struct rw_semaphore *sem)
{
}
+
+static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
+{
+}
#endif
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c7f1bc4f817c..b7326893221f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3681,7 +3681,6 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
(rnp == rnp_root ||
ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
- !mutex_is_locked(&rsp->exp_mutex) &&
mutex_trylock(&rsp->exp_mutex))
goto fastpath;