diff options
-rw-r--r-- | MAINTAINERS | 16 | ||||
-rw-r--r-- | arch/alpha/include/asm/rwsem.h | 68 | ||||
-rw-r--r-- | arch/ia64/include/asm/mutex.h | 2 | ||||
-rw-r--r-- | arch/ia64/include/asm/rwsem.h | 31 | ||||
-rw-r--r-- | arch/powerpc/include/asm/mutex.h | 2 | ||||
-rw-r--r-- | arch/s390/include/asm/rwsem.h | 37 | ||||
-rw-r--r-- | arch/x86/include/asm/mutex_32.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/mutex_64.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/rwsem.h | 18 | ||||
-rw-r--r-- | include/asm-generic/mutex-dec.h | 2 | ||||
-rw-r--r-- | include/asm-generic/mutex-xchg.h | 6 | ||||
-rw-r--r-- | include/asm-generic/qspinlock.h | 5 | ||||
-rw-r--r-- | include/asm-generic/rwsem.h | 22 | ||||
-rw-r--r-- | include/linux/compiler.h | 4 | ||||
-rw-r--r-- | include/linux/percpu-refcount.h | 12 | ||||
-rw-r--r-- | include/linux/rwsem.h | 8 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 13 | ||||
-rw-r--r-- | kernel/locking/mutex-debug.h | 4 | ||||
-rw-r--r-- | kernel/locking/mutex.h | 10 | ||||
-rw-r--r-- | kernel/locking/qspinlock.c | 72 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 2 | ||||
-rw-r--r-- | kernel/locking/rwsem-xadd.c | 194 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 8 | ||||
-rw-r--r-- | kernel/locking/rwsem.h | 52 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 1 |
25 files changed, 334 insertions, 263 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 16700e4fcc4a..2223d5b02053 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7007,15 +7007,23 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/ S: Maintained F: drivers/media/usb/dvb-usb-v2/lmedm04* -LOCKDEP AND LOCKSTAT +LOCKING PRIMITIVES M: Peter Zijlstra <[email protected]> M: Ingo Molnar <[email protected]> -T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/locking +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core S: Maintained -F: Documentation/locking/lockdep*.txt -F: Documentation/locking/lockstat.txt +F: Documentation/locking/ F: include/linux/lockdep.h +F: include/linux/spinlock*.h +F: arch/*/include/asm/spinlock*.h +F: include/linux/rwlock*.h +F: include/linux/mutex*.h +F: arch/*/include/asm/mutex*.h +F: include/linux/rwsem*.h +F: arch/*/include/asm/rwsem.h +F: include/linux/seqlock.h +F: lib/locking*.[ch] F: kernel/locking/ LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP/Vista Dynamic Disks) diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h index 0131a7058778..77873d0ad293 100644 --- a/arch/alpha/include/asm/rwsem.h +++ b/arch/alpha/include/asm/rwsem.h @@ -25,8 +25,8 @@ static inline void __down_read(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count += RWSEM_ACTIVE_READ_BIAS; + oldcount = sem->count.counter; + sem->count.counter += RWSEM_ACTIVE_READ_BIAS; #else long temp; __asm__ __volatile__( @@ -52,13 +52,13 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) { long old, new, res; - res = sem->count; + res = atomic_long_read(&sem->count); do { new = res + RWSEM_ACTIVE_READ_BIAS; if (new <= 0) break; old = res; - res = cmpxchg(&sem->count, old, new); + res = atomic_long_cmpxchg(&sem->count, old, new); } while (res != old); return res >= 0 ? 1 : 0; } @@ -67,8 +67,8 @@ static inline long ___down_write(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count += RWSEM_ACTIVE_WRITE_BIAS; + oldcount = sem->count.counter; + sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS; #else long temp; __asm__ __volatile__( @@ -106,7 +106,7 @@ static inline int __down_write_killable(struct rw_semaphore *sem) */ static inline int __down_write_trylock(struct rw_semaphore *sem) { - long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, + long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); if (ret == RWSEM_UNLOCKED_VALUE) return 1; @@ -117,8 +117,8 @@ static inline void __up_read(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count -= RWSEM_ACTIVE_READ_BIAS; + oldcount = sem->count.counter; + sem->count.counter -= RWSEM_ACTIVE_READ_BIAS; #else long temp; __asm__ __volatile__( @@ -142,8 +142,8 @@ static inline void __up_write(struct rw_semaphore *sem) { long count; #ifndef CONFIG_SMP - sem->count -= RWSEM_ACTIVE_WRITE_BIAS; - count = sem->count; + sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS; + count = sem->count.counter; #else long temp; __asm__ __volatile__( @@ -171,8 +171,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem) { long oldcount; #ifndef CONFIG_SMP - oldcount = sem->count; - sem->count -= RWSEM_WAITING_BIAS; + oldcount = sem->count.counter; + sem->count.counter -= RWSEM_WAITING_BIAS; #else long temp; __asm__ __volatile__( @@ -191,47 +191,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem) -{ -#ifndef CONFIG_SMP - sem->count += val; -#else - long temp; - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " addq %0,%2,%0\n" - " stq_c %0,%1\n" - " beq %0,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (temp), "=m" (sem->count) - :"Ir" (val), "m" (sem->count)); -#endif -} - -static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem) -{ -#ifndef CONFIG_SMP - sem->count += val; - return sem->count; -#else - long ret, temp; - __asm__ __volatile__( - "1: ldq_l %0,%1\n" - " addq %0,%3,%2\n" - " addq %0,%3,%0\n" - " stq_c %2,%1\n" - " beq %2,2f\n" - ".subsection 2\n" - "2: br 1b\n" - ".previous" - :"=&r" (ret), "=m" (sem->count), "=&r" (temp) - :"Ir" (val), "m" (sem->count)); - - return ret; -#endif -} - #endif /* __KERNEL__ */ #endif /* _ALPHA_RWSEM_H */ diff --git a/arch/ia64/include/asm/mutex.h b/arch/ia64/include/asm/mutex.h index f41e66d65e31..28cb819e0ff9 100644 --- a/arch/ia64/include/asm/mutex.h +++ b/arch/ia64/include/asm/mutex.h @@ -82,7 +82,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (cmpxchg_acq(count, 1, 0) == 1) + if (atomic_read(count) == 1 && cmpxchg_acq(count, 1, 0) == 1) return 1; return 0; } diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h index 8b23e070b844..8fa98dd303b4 100644 --- a/arch/ia64/include/asm/rwsem.h +++ b/arch/ia64/include/asm/rwsem.h @@ -40,7 +40,7 @@ static inline void __down_read (struct rw_semaphore *sem) { - long result = ia64_fetchadd8_acq((unsigned long *)&sem->count, 1); + long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1); if (result < 0) rwsem_down_read_failed(sem); @@ -55,9 +55,9 @@ ___down_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old + RWSEM_ACTIVE_WRITE_BIAS; - } while (cmpxchg_acq(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old); return old; } @@ -85,7 +85,7 @@ __down_write_killable (struct rw_semaphore *sem) static inline void __up_read (struct rw_semaphore *sem) { - long result = ia64_fetchadd8_rel((unsigned long *)&sem->count, -1); + long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1); if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0) rwsem_wake(sem); @@ -100,9 +100,9 @@ __up_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old - RWSEM_ACTIVE_WRITE_BIAS; - } while (cmpxchg_rel(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old); if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0) rwsem_wake(sem); @@ -115,8 +115,8 @@ static inline int __down_read_trylock (struct rw_semaphore *sem) { long tmp; - while ((tmp = sem->count) >= 0) { - if (tmp == cmpxchg_acq(&sem->count, tmp, tmp+1)) { + while ((tmp = atomic_long_read(&sem->count)) >= 0) { + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) { return 1; } } @@ -129,8 +129,8 @@ __down_read_trylock (struct rw_semaphore *sem) static inline int __down_write_trylock (struct rw_semaphore *sem) { - long tmp = cmpxchg_acq(&sem->count, RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); + long tmp = atomic_long_cmpxchg_acquire(&sem->count, + RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); return tmp == RWSEM_UNLOCKED_VALUE; } @@ -143,19 +143,12 @@ __downgrade_write (struct rw_semaphore *sem) long old, new; do { - old = sem->count; + old = atomic_long_read(&sem->count); new = old - RWSEM_WAITING_BIAS; - } while (cmpxchg_rel(&sem->count, old, new) != old); + } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old); if (old < 0) rwsem_downgrade_wake(sem); } -/* - * Implement atomic add functionality. These used to be "inline" functions, but GCC v3.1 - * doesn't quite optimize this stuff right and ends up with bad calls to fetchandadd. - */ -#define rwsem_atomic_add(delta, sem) atomic64_add(delta, (atomic64_t *)(&(sem)->count)) -#define rwsem_atomic_update(delta, sem) atomic64_add_return(delta, (atomic64_t *)(&(sem)->count)) - #endif /* _ASM_IA64_RWSEM_H */ diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h index 127ab23e1f6c..078155fa1189 100644 --- a/arch/powerpc/include/asm/mutex.h +++ b/arch/powerpc/include/asm/mutex.h @@ -124,7 +124,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1)) return 1; return 0; } diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index c75e4471e618..597e7e96b59e 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -207,41 +207,4 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - signed long old, new; - - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " agr %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "d" (delta) - : "cc", "memory"); -} - -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - signed long old, new; - - asm volatile( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " agr %1,%4\n" - " csg %0,%1,%2\n" - " jl 0b" - : "=&d" (old), "=&d" (new), "=Q" (sem->count) - : "Q" (sem->count), "d" (delta) - : "cc", "memory"); - return new; -} - #endif /* _S390_RWSEM_H */ diff --git a/arch/x86/include/asm/mutex_32.h b/arch/x86/include/asm/mutex_32.h index 85e6cda45a02..e9355a84fc67 100644 --- a/arch/x86/include/asm/mutex_32.h +++ b/arch/x86/include/asm/mutex_32.h @@ -101,7 +101,7 @@ static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { /* cmpxchg because it never induces a false contention state. */ - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) return 1; return 0; diff --git a/arch/x86/include/asm/mutex_64.h b/arch/x86/include/asm/mutex_64.h index 07537a44216e..d9850758464e 100644 --- a/arch/x86/include/asm/mutex_64.h +++ b/arch/x86/include/asm/mutex_64.h @@ -118,10 +118,10 @@ do { \ static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (likely(atomic_cmpxchg(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && atomic_cmpxchg(count, 1, 0) == 1)) return 1; - else - return 0; + + return 0; } #endif /* _ASM_X86_MUTEX_64_H */ diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 453744c1d347..089ced4edbbc 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -213,23 +213,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem) : "memory", "cc"); } -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" - : "+m" (sem->count) - : "er" (delta)); -} - -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - return delta + xadd(&sem->count, delta); -} - #endif /* __KERNEL__ */ #endif /* _ASM_X86_RWSEM_H */ diff --git a/include/asm-generic/mutex-dec.h b/include/asm-generic/mutex-dec.h index fd694cfd678a..c54829d3de37 100644 --- a/include/asm-generic/mutex-dec.h +++ b/include/asm-generic/mutex-dec.h @@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1)) + if (likely(atomic_read(count) == 1 && atomic_cmpxchg_acquire(count, 1, 0) == 1)) return 1; return 0; } diff --git a/include/asm-generic/mutex-xchg.h b/include/asm-generic/mutex-xchg.h index a6b4a7bd6ac9..3269ec4e195f 100644 --- a/include/asm-generic/mutex-xchg.h +++ b/include/asm-generic/mutex-xchg.h @@ -91,8 +91,12 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) static inline int __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) { - int prev = atomic_xchg_acquire(count, 0); + int prev; + if (atomic_read(count) != 1) + return 0; + + prev = atomic_xchg_acquire(count, 0); if (unlikely(prev < 0)) { /* * The lock was marked contended so we must restore that diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 05f05f17a7c2..9f0681bf1e87 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -111,10 +111,9 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock) static __always_inline void queued_spin_unlock(struct qspinlock *lock) { /* - * smp_mb__before_atomic() in order to guarantee release semantics + * unlock() needs release semantics: */ - smp_mb__before_atomic(); - atomic_sub(_Q_LOCKED_VAL, &lock->val); + (void)atomic_sub_return_release(_Q_LOCKED_VAL, &lock->val); } #endif diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index 3fc94a046bf5..5be122e3d326 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -41,8 +41,8 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) { long tmp; - while ((tmp = sem->count) >= 0) { - if (tmp == cmpxchg_acquire(&sem->count, tmp, + while ((tmp = atomic_long_read(&sem->count)) >= 0) { + if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp + RWSEM_ACTIVE_READ_BIAS)) { return 1; } @@ -79,7 +79,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) { long tmp; - tmp = cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, + tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); return tmp == RWSEM_UNLOCKED_VALUE; } @@ -107,14 +107,6 @@ static inline void __up_write(struct rw_semaphore *sem) } /* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) -{ - atomic_long_add(delta, (atomic_long_t *)&sem->count); -} - -/* * downgrade write lock to read lock */ static inline void __downgrade_write(struct rw_semaphore *sem) @@ -134,13 +126,5 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -/* - * implement exchange and add functionality - */ -static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) -{ - return atomic_long_add_return(delta, (atomic_long_t *)&sem->count); -} - #endif /* __KERNEL__ */ #endif /* _ASM_GENERIC_RWSEM_H */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 793c0829e3a3..06f27fd9d760 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -545,10 +545,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * Similar to rcu_dereference(), but for situations where the pointed-to * object's lifetime is managed by something other than RCU. That * "something other" might be reference counting or simple immortality. + * + * The seemingly unused void * variable is to validate @p is indeed a pointer + * type. All pointer types silently cast to void *. */ #define lockless_dereference(p) \ ({ \ typeof(p) _________p1 = READ_ONCE(p); \ + __maybe_unused const void * const _________p2 = _________p1; \ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_________p1); \ }) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 84f542df7ff5..1c7eec09e5eb 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -136,14 +136,12 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref, * used as a pointer. If the compiler generates a separate fetch * when using it as a pointer, __PERCPU_REF_ATOMIC may be set in * between contaminating the pointer value, meaning that - * ACCESS_ONCE() is required when fetching it. - * - * Also, we need a data dependency barrier to be paired with - * smp_store_release() in __percpu_ref_switch_to_percpu(). - * - * Use lockless deref which contains both. + * READ_ONCE() is required when fetching it. */ - percpu_ptr = lockless_dereference(ref->percpu_count_ptr); + percpu_ptr = READ_ONCE(ref->percpu_count_ptr); + + /* paired with smp_store_release() in __percpu_ref_switch_to_percpu() */ + smp_read_barrier_depends(); /* * Theoretically, the following could test just ATOMIC; however, diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index d37fbb34d06f..dd1d14250340 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -23,10 +23,11 @@ struct rw_semaphore; #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK #include <linux/rwsem-spinlock.h> /* use a generic implementation */ +#define __RWSEM_INIT_COUNT(name) .count = RWSEM_UNLOCKED_VALUE #else /* All arch specific implementations share the same struct */ struct rw_semaphore { - long count; + atomic_long_t count; struct list_head wait_list; raw_spinlock_t wait_lock; #ifdef CONFIG_RWSEM_SPIN_ON_OWNER @@ -54,9 +55,10 @@ extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); /* In all implementations count != 0 means locked */ static inline int rwsem_is_locked(struct rw_semaphore *sem) { - return sem->count != 0; + return atomic_long_read(&sem->count) != 0; } +#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) #endif /* Common initializer macros and functions */ @@ -74,7 +76,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) #endif #define __RWSEM_INITIALIZER(name) \ - { .count = RWSEM_UNLOCKED_VALUE, \ + { __RWSEM_INIT_COUNT(name), \ .wait_list = LIST_HEAD_INIT((name).wait_list), \ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) \ __RWSEM_OPT_INIT(name) \ diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 81f1a7107c0e..589d763a49b3 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -46,6 +46,7 @@ #include <linux/gfp.h> #include <linux/kmemcheck.h> #include <linux/random.h> +#include <linux/jhash.h> #include <asm/sections.h> @@ -309,10 +310,14 @@ static struct hlist_head chainhash_table[CHAINHASH_SIZE]; * It's a 64-bit hash, because it's important for the keys to be * unique. */ -#define iterate_chain_key(key1, key2) \ - (((key1) << MAX_LOCKDEP_KEYS_BITS) ^ \ - ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \ - (key2)) +static inline u64 iterate_chain_key(u64 key, u32 idx) +{ + u32 k0 = key, k1 = key >> 32; + + __jhash_mix(idx, k0, k1); /* Macro that modifies arguments! */ + + return k0 | (u64)k1 << 32; +} void lockdep_off(void) { diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h index 0799fd3e4cfa..372e6530180d 100644 --- a/kernel/locking/mutex-debug.h +++ b/kernel/locking/mutex-debug.h @@ -29,12 +29,12 @@ extern void debug_mutex_init(struct mutex *lock, const char *name, static inline void mutex_set_owner(struct mutex *lock) { - lock->owner = current; + WRITE_ONCE(lock->owner, current); } static inline void mutex_clear_owner(struct mutex *lock) { - lock->owner = NULL; + WRITE_ONCE(lock->owner, NULL); } #define spin_lock_mutex(lock, flags) \ diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 5cda397607f2..12f96199441c 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -17,14 +17,20 @@ __list_del((waiter)->list.prev, (waiter)->list.next) #ifdef CONFIG_MUTEX_SPIN_ON_OWNER +/* + * The mutex owner can get read and written to locklessly. + * We should use WRITE_ONCE when writing the owner value to + * avoid store tearing, otherwise, a thread could potentially + * read a partially written and incomplete owner value. + */ static inline void mutex_set_owner(struct mutex *lock) { - lock->owner = current; + WRITE_ONCE(lock->owner, current); } static inline void mutex_clear_owner(struct mutex *lock) { - lock->owner = NULL; + WRITE_ONCE(lock->owner, NULL); } #else static inline void mutex_set_owner(struct mutex *lock) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 5fc8c311b8fe..2f9153b183c9 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -90,7 +90,7 @@ static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); * therefore increment the cpu number by one. */ -static inline u32 encode_tail(int cpu, int idx) +static inline __pure u32 encode_tail(int cpu, int idx) { u32 tail; @@ -103,7 +103,7 @@ static inline u32 encode_tail(int cpu, int idx) return tail; } -static inline struct mcs_spinlock *decode_tail(u32 tail) +static inline __pure struct mcs_spinlock *decode_tail(u32 tail) { int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; @@ -268,6 +268,63 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, #endif /* + * Various notes on spin_is_locked() and spin_unlock_wait(), which are + * 'interesting' functions: + * + * PROBLEM: some architectures have an interesting issue with atomic ACQUIRE + * operations in that the ACQUIRE applies to the LOAD _not_ the STORE (ARM64, + * PPC). Also qspinlock has a similar issue per construction, the setting of + * the locked byte can be unordered acquiring the lock proper. + * + * This gets to be 'interesting' in the following cases, where the /should/s + * end up false because of this issue. + * + * + * CASE 1: + * + * So the spin_is_locked() correctness issue comes from something like: + * + * CPU0 CPU1 + * + * global_lock(); local_lock(i) + * spin_lock(&G) spin_lock(&L[i]) + * for (i) if (!spin_is_locked(&G)) { + * spin_unlock_wait(&L[i]); smp_acquire__after_ctrl_dep(); + * return; + * } + * // deal with fail + * + * Where it is important CPU1 sees G locked or CPU0 sees L[i] locked such + * that there is exclusion between the two critical sections. + * + * The load from spin_is_locked(&G) /should/ be constrained by the ACQUIRE from + * spin_lock(&L[i]), and similarly the load(s) from spin_unlock_wait(&L[i]) + * /should/ be constrained by the ACQUIRE from spin_lock(&G). + * + * Similarly, later stuff is constrained by the ACQUIRE from CTRL+RMB. + * + * + * CASE 2: + * + * For spin_unlock_wait() there is a second correctness issue, namely: + * + * CPU0 CPU1 + * + * flag = set; + * smp_mb(); spin_lock(&l) + * spin_unlock_wait(&l); if (!flag) + * // add to lockless list + * spin_unlock(&l); + * // iterate lockless list + * + * Which wants to ensure that CPU1 will stop adding bits to the list and CPU0 + * will observe the last entry on the list (if spin_unlock_wait() had ACQUIRE + * semantics etc..) + * + * Where flag /should/ be ordered against the locked store of l. + */ + +/* * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before * issuing an _unordered_ store to set _Q_LOCKED_VAL. * @@ -455,6 +512,8 @@ queue: * pending stuff. * * p,*,* -> n,*,* + * + * RELEASE, such that the stores to @node must be complete. */ old = xchg_tail(lock, tail); next = NULL; @@ -465,6 +524,15 @@ queue: */ if (old & _Q_TAIL_MASK) { prev = decode_tail(old); + /* + * The above xchg_tail() is also a load of @lock which generates, + * through decode_tail(), a pointer. + * + * The address dependency matches the RELEASE of xchg_tail() + * such that the access to @prev must happen after. + */ + smp_read_barrier_depends(); + WRITE_ONCE(prev->next, node); pv_wait_node(node, prev); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 3e746607abe5..1ec0f48962b3 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1478,7 +1478,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); */ int __sched rt_mutex_trylock(struct rt_mutex *lock) { - if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq())) + if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq())) return 0; return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock); diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 09e30c6225e5..2031281bb940 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -80,7 +80,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, debug_check_no_locks_freed((void *)sem, sizeof(*sem)); lockdep_init_map(&sem->dep_map, name, key, 0); #endif - sem->count = RWSEM_UNLOCKED_VALUE; + atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); raw_spin_lock_init(&sem->wait_lock); INIT_LIST_HEAD(&sem->wait_list); #ifdef CONFIG_RWSEM_SPIN_ON_OWNER @@ -114,12 +114,16 @@ enum rwsem_wake_type { * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed) * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so) * - there must be someone on the queue - * - the spinlock must be held by the caller + * - the wait_lock must be held by the caller + * - tasks are marked for wakeup, the caller must later invoke wake_up_q() + * to actually wakeup the blocked task(s) and drop the reference count, + * preferably when the wait_lock is released * - woken process blocks are discarded from the list after having task zeroed - * - writers are only woken if downgrading is false + * - writers are only marked woken if downgrading is false */ static struct rw_semaphore * -__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) +__rwsem_mark_wake(struct rw_semaphore *sem, + enum rwsem_wake_type wake_type, struct wake_q_head *wake_q) { struct rwsem_waiter *waiter; struct task_struct *tsk; @@ -128,13 +132,16 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); if (waiter->type == RWSEM_WAITING_FOR_WRITE) { - if (wake_type == RWSEM_WAKE_ANY) - /* Wake writer at the front of the queue, but do not - * grant it the lock yet as we want other writers - * to be able to steal it. Readers, on the other hand, - * will block as they will notice the queued writer. + if (wake_type == RWSEM_WAKE_ANY) { + /* + * Mark writer at the front of the queue for wakeup. + * Until the task is actually later awoken later by + * the caller, other writers are able to steal it. + * Readers, on the other hand, will block as they + * will notice the queued writer. */ - wake_up_process(waiter->task); + wake_q_add(wake_q, waiter->task); + } goto out; } @@ -146,15 +153,27 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) if (wake_type != RWSEM_WAKE_READ_OWNED) { adjustment = RWSEM_ACTIVE_READ_BIAS; try_reader_grant: - oldcount = rwsem_atomic_update(adjustment, sem) - adjustment; + oldcount = atomic_long_add_return(adjustment, &sem->count) - adjustment; + if (unlikely(oldcount < RWSEM_WAITING_BIAS)) { - /* A writer stole the lock. Undo our reader grant. */ - if (rwsem_atomic_update(-adjustment, sem) & - RWSEM_ACTIVE_MASK) + /* + * If the count is still less than RWSEM_WAITING_BIAS + * after removing the adjustment, it is assumed that + * a writer has stolen the lock. We have to undo our + * reader grant. + */ + if (atomic_long_add_return(-adjustment, &sem->count) < + RWSEM_WAITING_BIAS) goto out; /* Last active locker left. Retry waking readers. */ goto try_reader_grant; } + /* + * It is not really necessary to set it to reader-owned here, + * but it gives the spinners an early indication that the + * readers now have the lock. + */ + rwsem_set_reader_owned(sem); } /* Grant an infinite number of read locks to the readers at the front @@ -179,7 +198,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) adjustment -= RWSEM_WAITING_BIAS; if (adjustment) - rwsem_atomic_add(adjustment, sem); + atomic_long_add(adjustment, &sem->count); next = sem->wait_list.next; loop = woken; @@ -187,17 +206,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) waiter = list_entry(next, struct rwsem_waiter, list); next = waiter->list.next; tsk = waiter->task; + + wake_q_add(wake_q, tsk); /* - * Make sure we do not wakeup the next reader before - * setting the nil condition to grant the next reader; - * otherwise we could miss the wakeup on the other - * side and end up sleeping again. See the pairing - * in rwsem_down_read_failed(). + * Ensure that the last operation is setting the reader + * waiter to nil such that rwsem_down_read_failed() cannot + * race with do_exit() by always holding a reference count + * to the task to wakeup. */ - smp_mb(); - waiter->task = NULL; - wake_up_process(tsk); - put_task_struct(tsk); + smp_store_release(&waiter->task, NULL); } while (--loop); sem->wait_list.next = next; @@ -216,11 +233,11 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; struct rwsem_waiter waiter; struct task_struct *tsk = current; + WAKE_Q(wake_q); /* set up my own style of waitqueue */ waiter.task = tsk; waiter.type = RWSEM_WAITING_FOR_READ; - get_task_struct(tsk); raw_spin_lock_irq(&sem->wait_lock); if (list_empty(&sem->wait_list)) @@ -228,7 +245,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) list_add_tail(&waiter.list, &sem->wait_list); /* we're now waiting on the lock, but no longer actively locking */ - count = rwsem_atomic_update(adjustment, sem); + count = atomic_long_add_return(adjustment, &sem->count); /* If there are no active locks, wake the front queued process(es). * @@ -238,9 +255,10 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) if (count == RWSEM_WAITING_BIAS || (count > RWSEM_WAITING_BIAS && adjustment != -RWSEM_ACTIVE_READ_BIAS)) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); + sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); + wake_up_q(&wake_q); /* wait to be given the lock */ while (true) { @@ -255,17 +273,29 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) } EXPORT_SYMBOL(rwsem_down_read_failed); +/* + * This function must be called with the sem->wait_lock held to prevent + * race conditions between checking the rwsem wait list and setting the + * sem->count accordingly. + */ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) { /* - * Try acquiring the write lock. Check count first in order - * to reduce unnecessary expensive cmpxchg() operations. + * Avoid trying to acquire write lock if count isn't RWSEM_WAITING_BIAS. */ - if (count == RWSEM_WAITING_BIAS && - cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, - RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { - if (!list_is_singular(&sem->wait_list)) - rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); + if (count != RWSEM_WAITING_BIAS) + return false; + + /* + * Acquire the lock by trying to set it to ACTIVE_WRITE_BIAS. If there + * are other tasks on the wait list, we need to add on WAITING_BIAS. + */ + count = list_is_singular(&sem->wait_list) ? + RWSEM_ACTIVE_WRITE_BIAS : + RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS; + + if (atomic_long_cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count) + == RWSEM_WAITING_BIAS) { rwsem_set_owner(sem); return true; } @@ -279,13 +309,13 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) */ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) { - long old, count = READ_ONCE(sem->count); + long old, count = atomic_long_read(&sem->count); while (true) { if (!(count == 0 || count == RWSEM_WAITING_BIAS)) return false; - old = cmpxchg_acquire(&sem->count, count, + old = atomic_long_cmpxchg_acquire(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); if (old == count) { rwsem_set_owner(sem); @@ -306,16 +336,11 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) rcu_read_lock(); owner = READ_ONCE(sem->owner); - if (!owner) { - long count = READ_ONCE(sem->count); + if (!rwsem_owner_is_writer(owner)) { /* - * If sem->owner is not set, yet we have just recently entered the - * slowpath with the lock being active, then there is a possibility - * reader(s) may have the lock. To be safe, bail spinning in these - * situations. + * Don't spin if the rwsem is readers owned. */ - if (count & RWSEM_ACTIVE_MASK) - ret = false; + ret = !rwsem_owner_is_reader(owner); goto done; } @@ -325,10 +350,15 @@ done: return ret; } -static noinline -bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) +/* + * Return true only if we can still spin on the owner field of the rwsem. + */ +static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem) { - long count; + struct task_struct *owner = READ_ONCE(sem->owner); + + if (!rwsem_owner_is_writer(owner)) + goto out; rcu_read_lock(); while (sem->owner == owner) { @@ -349,22 +379,16 @@ bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) cpu_relax_lowlatency(); } rcu_read_unlock(); - - if (READ_ONCE(sem->owner)) - return true; /* new owner, continue spinning */ - +out: /* - * When the owner is not set, the lock could be free or - * held by readers. Check the counter to verify the - * state. + * If there is a new owner or the owner is not set, we continue + * spinning. */ - count = READ_ONCE(sem->count); - return (count == 0 || count == RWSEM_WAITING_BIAS); + return !rwsem_owner_is_reader(READ_ONCE(sem->owner)); } static bool rwsem_optimistic_spin(struct rw_semaphore *sem) { - struct task_struct *owner; bool taken = false; preempt_disable(); @@ -376,12 +400,17 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) if (!osq_lock(&sem->osq)) goto done; - while (true) { - owner = READ_ONCE(sem->owner); - if (owner && !rwsem_spin_on_owner(sem, owner)) - break; - - /* wait_lock will be acquired if write_lock is obtained */ + /* + * Optimistically spin on the owner field and attempt to acquire the + * lock whenever the owner changes. Spinning will be stopped when: + * 1) the owning writer isn't running; or + * 2) readers own the lock as we can't determine if they are + * actively running or not. + */ + while (rwsem_spin_on_owner(sem)) { + /* + * Try to acquire the lock + */ if (rwsem_try_write_lock_unqueued(sem)) { taken = true; break; @@ -393,7 +422,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) * we're an RT task that will live-lock because we won't let * the owner complete. */ - if (!owner && (need_resched() || rt_task(current))) + if (!sem->owner && (need_resched() || rt_task(current))) break; /* @@ -440,9 +469,10 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) bool waiting = true; /* any queued threads before us */ struct rwsem_waiter waiter; struct rw_semaphore *ret = sem; + WAKE_Q(wake_q); /* undo write bias from down_write operation, stop active locking */ - count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem); + count = atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS, &sem->count); /* do optimistic spinning and steal lock if possible */ if (rwsem_optimistic_spin(sem)) @@ -465,18 +495,29 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) /* we're now waiting on the lock, but no longer actively locking */ if (waiting) { - count = READ_ONCE(sem->count); + count = atomic_long_read(&sem->count); /* * If there were already threads queued before us and there are * no active writers, the lock must be read owned; so we try to * wake any read locks that were queued ahead of us. */ - if (count > RWSEM_WAITING_BIAS) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); + if (count > RWSEM_WAITING_BIAS) { + WAKE_Q(wake_q); + + sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q); + /* + * The wakeup is normally called _after_ the wait_lock + * is released, but given that we are proactively waking + * readers we can deal with the wake_q overhead as it is + * similar to releasing and taking the wait_lock again + * for attempting rwsem_try_write_lock(). + */ + wake_up_q(&wake_q); + } } else - count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); + count = atomic_long_add_return(RWSEM_WAITING_BIAS, &sem->count); /* wait until we successfully acquire the lock */ set_current_state(state); @@ -492,7 +533,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) schedule(); set_current_state(state); - } while ((count = sem->count) & RWSEM_ACTIVE_MASK); + } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK); raw_spin_lock_irq(&sem->wait_lock); } @@ -507,10 +548,11 @@ out_nolock: raw_spin_lock_irq(&sem->wait_lock); list_del(&waiter.list); if (list_empty(&sem->wait_list)) - rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem); + atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); else - __rwsem_do_wake(sem, RWSEM_WAKE_ANY); + __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); + wake_up_q(&wake_q); return ERR_PTR(-EINTR); } @@ -537,6 +579,7 @@ __visible struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) { unsigned long flags; + WAKE_Q(wake_q); /* * If a spinner is present, it is not necessary to do the wakeup. @@ -573,9 +616,10 @@ locked: /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY); + sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + wake_up_q(&wake_q); return sem; } @@ -590,14 +634,16 @@ __visible struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) { unsigned long flags; + WAKE_Q(wake_q); raw_spin_lock_irqsave(&sem->wait_lock, flags); /* do nothing if list empty */ if (!list_empty(&sem->wait_list)) - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED); + sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + wake_up_q(&wake_q); return sem; } diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 2e853ad93a3a..45ba475d4be3 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -22,6 +22,7 @@ void __sched down_read(struct rw_semaphore *sem) rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_read_trylock, __down_read); + rwsem_set_reader_owned(sem); } EXPORT_SYMBOL(down_read); @@ -33,8 +34,10 @@ int down_read_trylock(struct rw_semaphore *sem) { int ret = __down_read_trylock(sem); - if (ret == 1) + if (ret == 1) { rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); + rwsem_set_reader_owned(sem); + } return ret; } @@ -124,7 +127,7 @@ void downgrade_write(struct rw_semaphore *sem) * lockdep: a downgraded write will live on as a write * dependency. */ - rwsem_clear_owner(sem); + rwsem_set_reader_owned(sem); __downgrade_write(sem); } @@ -138,6 +141,7 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); LOCK_CONTENDED(sem, __down_read_trylock, __down_read); + rwsem_set_reader_owned(sem); } EXPORT_SYMBOL(down_read_nested); diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index 870ed9a5b426..a699f4048ba1 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -1,14 +1,58 @@ +/* + * The owner field of the rw_semaphore structure will be set to + * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear + * the owner field when it unlocks. A reader, on the other hand, will + * not touch the owner field when it unlocks. + * + * In essence, the owner field now has the following 3 states: + * 1) 0 + * - lock is free or the owner hasn't set the field yet + * 2) RWSEM_READER_OWNED + * - lock is currently or previously owned by readers (lock is free + * or not set by owner yet) + * 3) Other non-zero value + * - a writer owns the lock + */ +#define RWSEM_READER_OWNED ((struct task_struct *)1UL) + #ifdef CONFIG_RWSEM_SPIN_ON_OWNER +/* + * All writes to owner are protected by WRITE_ONCE() to make sure that + * store tearing can't happen as optimistic spinners may read and use + * the owner value concurrently without lock. Read from owner, however, + * may not need READ_ONCE() as long as the pointer value is only used + * for comparison and isn't being dereferenced. + */ static inline void rwsem_set_owner(struct rw_semaphore *sem) { - sem->owner = current; + WRITE_ONCE(sem->owner, current); } static inline void rwsem_clear_owner(struct rw_semaphore *sem) { - sem->owner = NULL; + WRITE_ONCE(sem->owner, NULL); +} + +static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) +{ + /* + * We check the owner value first to make sure that we will only + * do a write to the rwsem cacheline when it is really necessary + * to minimize cacheline contention. + */ + if (sem->owner != RWSEM_READER_OWNED) + WRITE_ONCE(sem->owner, RWSEM_READER_OWNED); +} + +static inline bool rwsem_owner_is_writer(struct task_struct *owner) +{ + return owner && owner != RWSEM_READER_OWNED; } +static inline bool rwsem_owner_is_reader(struct task_struct *owner) +{ + return owner == RWSEM_READER_OWNED; +} #else static inline void rwsem_set_owner(struct rw_semaphore *sem) { @@ -17,4 +61,8 @@ static inline void rwsem_set_owner(struct rw_semaphore *sem) static inline void rwsem_clear_owner(struct rw_semaphore *sem) { } + +static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) +{ +} #endif diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c7f1bc4f817c..b7326893221f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3681,7 +3681,6 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) && (rnp == rnp_root || ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) && - !mutex_is_locked(&rsp->exp_mutex) && mutex_trylock(&rsp->exp_mutex)) goto fastpath; |