From 2f1f043e7bea3fbf4c1869df2f7a0312bc8ca2bf Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Thu, 12 Jan 2023 22:59:53 -0800 Subject: locking/lockdep: Introduce lock_sync() Currently, functions like synchronize_srcu() do not have lockdep annotations resembling those of other write-side locking primitives. Such annotations might look as follows: lock_acquire(); lock_release(); Such annotations would tell lockdep that synchronize_srcu() acts like an empty critical section that waits for other (read-side) critical sections to finish. This would definitely catch some deadlock, but as pointed out by Paul Mckenney [1], this could also introduce false positives because of irq-safe/unsafe detection. Of course, there are tricks could help with this: might_sleep(); // Existing statement in __synchronize_srcu(). if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { local_irq_disable(); lock_acquire(); lock_release(); local_irq_enable(); } But it would be better for lockdep to provide a separate annonation for functions like synchronize_srcu(), so that people won't need to repeat the ugly tricks above. Therefore introduce lock_sync(), which is simply an lock+unlock pair with no irq safe/unsafe deadlock check. This works because the to-be-annontated functions do not create real critical sections, and there is therefore no way that irq can create extra dependencies. [1]: https://lore.kernel.org/lkml/20180412021233.ewncg5jjuzjw3x62@tardis/ Signed-off-by: Boqun Feng Acked-by: Waiman Long Signed-off-by: Paul E. McKenney [ boqun: Fix typos reported by Davidlohr Bueso and Paul E. Mckenney ] Acked-by: Peter Zijlstra (Intel) Signed-off-by: Boqun Feng --- kernel/locking/lockdep.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'kernel/locking/lockdep.c') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 50d4863974e7..3ee3b278789d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -5693,6 +5693,40 @@ void lock_release(struct lockdep_map *lock, unsigned long ip) } EXPORT_SYMBOL_GPL(lock_release); +/* + * lock_sync() - A special annotation for synchronize_{s,}rcu()-like API. + * + * No actual critical section is created by the APIs annotated with this: these + * APIs are used to wait for one or multiple critical sections (on other CPUs + * or threads), and it means that calling these APIs inside these critical + * sections is potential deadlock. + * + * This annotation acts as an acquire+release annotation pair with hardirqoff + * being 1. Since there's no critical section, no interrupt can create extra + * dependencies "inside" the annotation, hardirqoff == 1 allows us to avoid + * false positives. + */ +void lock_sync(struct lockdep_map *lock, unsigned subclass, int read, + int check, struct lockdep_map *nest_lock, unsigned long ip) +{ + unsigned long flags; + + if (unlikely(!lockdep_enabled())) + return; + + raw_local_irq_save(flags); + check_flags(flags); + + lockdep_recursion_inc(); + __lock_acquire(lock, subclass, 0, read, check, 1, nest_lock, ip, 0, 0); + + if (__lock_release(lock, ip)) + check_chain_key(current); + lockdep_recursion_finish(); + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_sync); + noinstr int lock_is_held_type(const struct lockdep_map *lock, int read) { unsigned long flags; -- cgit From 0471db447cb7de56bbe2fedd9256b4d2b8ef642a Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 13 Jan 2023 15:57:22 -0800 Subject: locking/lockdep: Improve the deadlock scenario print for sync and read lock Lock scenario print is always a weak spot of lockdep splats. Improvement can be made if we rework the dependency search and the error printing. However without touching the graph search, we can improve a little for the circular deadlock case, since we have the to-be-added lock dependency, and know whether these two locks are read/write/sync. In order to know whether a held_lock is sync or not, a bit was "stolen" from ->references, which reduce our limit for the same lock class nesting from 2^12 to 2^11, and it should still be good enough. Besides, since we now have bit in held_lock for sync, we don't need the "hardirqoffs being 1" trick, and also we can avoid the __lock_release() if we jump out of __lock_acquire() before the held_lock stored. With these changes, a deadlock case evolved with read lock and sync gets a better print-out from: [...] Possible unsafe locking scenario: [...] [...] CPU0 CPU1 [...] ---- ---- [...] lock(srcuA); [...] lock(srcuB); [...] lock(srcuA); [...] lock(srcuB); to [...] Possible unsafe locking scenario: [...] [...] CPU0 CPU1 [...] ---- ---- [...] rlock(srcuA); [...] lock(srcuB); [...] lock(srcuA); [...] sync(srcuB); Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney Acked-by: Peter Zijlstra (Intel) Signed-off-by: Boqun Feng --- include/linux/lockdep.h | 3 ++- kernel/locking/lockdep.c | 48 ++++++++++++++++++++++++++++++++---------------- 2 files changed, 34 insertions(+), 17 deletions(-) (limited to 'kernel/locking/lockdep.c') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 14d9dbedc6c1..b32256e9e944 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -134,7 +134,8 @@ struct held_lock { unsigned int read:2; /* see lock_acquire() comment */ unsigned int check:1; /* see lock_acquire() comment */ unsigned int hardirqs_off:1; - unsigned int references:12; /* 32 bits */ + unsigned int sync:1; + unsigned int references:11; /* 32 bits */ unsigned int pin_count; }; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 3ee3b278789d..dcd1d5bfc1e0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1881,6 +1881,8 @@ print_circular_lock_scenario(struct held_lock *src, struct lock_class *source = hlock_class(src); struct lock_class *target = hlock_class(tgt); struct lock_class *parent = prt->class; + int src_read = src->read; + int tgt_read = tgt->read; /* * A direct locking problem where unsafe_class lock is taken @@ -1908,7 +1910,10 @@ print_circular_lock_scenario(struct held_lock *src, printk(" Possible unsafe locking scenario:\n\n"); printk(" CPU0 CPU1\n"); printk(" ---- ----\n"); - printk(" lock("); + if (tgt_read != 0) + printk(" rlock("); + else + printk(" lock("); __print_lock_name(target); printk(KERN_CONT ");\n"); printk(" lock("); @@ -1917,7 +1922,12 @@ print_circular_lock_scenario(struct held_lock *src, printk(" lock("); __print_lock_name(target); printk(KERN_CONT ");\n"); - printk(" lock("); + if (src_read != 0) + printk(" rlock("); + else if (src->sync) + printk(" sync("); + else + printk(" lock("); __print_lock_name(source); printk(KERN_CONT ");\n"); printk("\n *** DEADLOCK ***\n\n"); @@ -4531,7 +4541,13 @@ mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) return 0; } } - if (!hlock->hardirqs_off) { + + /* + * For lock_sync(), don't mark the ENABLED usage, since lock_sync() + * creates no critical section and no extra dependency can be introduced + * by interrupts + */ + if (!hlock->hardirqs_off && !hlock->sync) { if (hlock->read) { if (!mark_lock(curr, hlock, LOCK_ENABLED_HARDIRQ_READ)) @@ -4910,7 +4926,7 @@ static int __lock_is_held(const struct lockdep_map *lock, int read); static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, int hardirqs_off, struct lockdep_map *nest_lock, unsigned long ip, - int references, int pin_count) + int references, int pin_count, int sync) { struct task_struct *curr = current; struct lock_class *class = NULL; @@ -4961,7 +4977,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, class_idx = class - lock_classes; - if (depth) { /* we're holding locks */ + if (depth && !sync) { + /* we're holding locks and the new held lock is not a sync */ hlock = curr->held_locks + depth - 1; if (hlock->class_idx == class_idx && nest_lock) { if (!references) @@ -4995,6 +5012,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->trylock = trylock; hlock->read = read; hlock->check = check; + hlock->sync = !!sync; hlock->hardirqs_off = !!hardirqs_off; hlock->references = references; #ifdef CONFIG_LOCK_STAT @@ -5056,6 +5074,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (!validate_chain(curr, hlock, chain_head, chain_key)) return 0; + /* For lock_sync(), we are done here since no actual critical section */ + if (hlock->sync) + return 1; + curr->curr_chain_key = chain_key; curr->lockdep_depth++; check_chain_key(curr); @@ -5197,7 +5219,7 @@ static int reacquire_held_locks(struct task_struct *curr, unsigned int depth, hlock->read, hlock->check, hlock->hardirqs_off, hlock->nest_lock, hlock->acquire_ip, - hlock->references, hlock->pin_count)) { + hlock->references, hlock->pin_count, 0)) { case 0: return 1; case 1: @@ -5667,7 +5689,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, lockdep_recursion_inc(); __lock_acquire(lock, subclass, trylock, read, check, - irqs_disabled_flags(flags), nest_lock, ip, 0, 0); + irqs_disabled_flags(flags), nest_lock, ip, 0, 0, 0); lockdep_recursion_finish(); raw_local_irq_restore(flags); } @@ -5700,11 +5722,6 @@ EXPORT_SYMBOL_GPL(lock_release); * APIs are used to wait for one or multiple critical sections (on other CPUs * or threads), and it means that calling these APIs inside these critical * sections is potential deadlock. - * - * This annotation acts as an acquire+release annotation pair with hardirqoff - * being 1. Since there's no critical section, no interrupt can create extra - * dependencies "inside" the annotation, hardirqoff == 1 allows us to avoid - * false positives. */ void lock_sync(struct lockdep_map *lock, unsigned subclass, int read, int check, struct lockdep_map *nest_lock, unsigned long ip) @@ -5718,10 +5735,9 @@ void lock_sync(struct lockdep_map *lock, unsigned subclass, int read, check_flags(flags); lockdep_recursion_inc(); - __lock_acquire(lock, subclass, 0, read, check, 1, nest_lock, ip, 0, 0); - - if (__lock_release(lock, ip)) - check_chain_key(current); + __lock_acquire(lock, subclass, 0, read, check, + irqs_disabled_flags(flags), nest_lock, ip, 0, 0, 1); + check_chain_key(current); lockdep_recursion_finish(); raw_local_irq_restore(flags); } -- cgit From 0cce06ba859a515bd06224085d3addb870608b6d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Apr 2023 17:03:13 +0200 Subject: debugobjects,locking: Annotate debug_object_fill_pool() wait type violation There is an explicit wait-type violation in debug_object_fill_pool() for PREEMPT_RT=n kernels which allows them to more easily fill the object pool and reduce the chance of allocation failures. Lockdep's wait-type checks are designed to check the PREEMPT_RT locking rules even for PREEMPT_RT=n kernels and object to this, so create a lockdep annotation to allow this to stand. Specifically, create a 'lock' type that overrides the inner wait-type while it is held -- allowing one to temporarily raise it, such that the violation is hidden. Reported-by: Vlastimil Babka Reported-by: Qi Zheng Signed-off-by: Peter Zijlstra (Intel) Tested-by: Qi Zheng Link: https://lkml.kernel.org/r/20230429100614.GA1489784@hirez.programming.kicks-ass.net --- include/linux/lockdep.h | 14 ++++++++++++++ include/linux/lockdep_types.h | 1 + kernel/locking/lockdep.c | 28 +++++++++++++++++++++------- lib/debugobjects.c | 15 +++++++++++++-- 4 files changed, 49 insertions(+), 9 deletions(-) (limited to 'kernel/locking/lockdep.c') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 1023f349af71..a3329fb49b33 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -339,6 +339,16 @@ extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); #define lockdep_repin_lock(l,c) lock_repin_lock(&(l)->dep_map, (c)) #define lockdep_unpin_lock(l,c) lock_unpin_lock(&(l)->dep_map, (c)) +/* + * Must use lock_map_aquire_try() with override maps to avoid + * lockdep thinking they participate in the block chain. + */ +#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type) \ + struct lockdep_map _name = { \ + .name = #_name "-wait-type-override", \ + .wait_type_inner = _wait_type, \ + .lock_type = LD_LOCK_WAIT_OVERRIDE, } + #else /* !CONFIG_LOCKDEP */ static inline void lockdep_init_task(struct task_struct *task) @@ -427,6 +437,9 @@ extern int lockdep_is_held(const void *); #define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0) #define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0) +#define DEFINE_WAIT_OVERRIDE_MAP(_name, _wait_type) \ + struct lockdep_map __maybe_unused _name = {} + #endif /* !LOCKDEP */ enum xhlock_context_t { @@ -551,6 +564,7 @@ do { \ #define rwsem_release(l, i) lock_release(l, i) #define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_) +#define lock_map_acquire_try(l) lock_acquire_exclusive(l, 0, 1, NULL, _THIS_IP_) #define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_) #define lock_map_acquire_tryread(l) lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_) #define lock_map_release(l) lock_release(l, _THIS_IP_) diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index d22430840b53..59f4fb1626ea 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -33,6 +33,7 @@ enum lockdep_wait_type { enum lockdep_lock_type { LD_LOCK_NORMAL = 0, /* normal, catch all */ LD_LOCK_PERCPU, /* percpu */ + LD_LOCK_WAIT_OVERRIDE, /* annotation */ LD_LOCK_MAX, }; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 50d4863974e7..62ef295e07e6 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2253,6 +2253,9 @@ static inline bool usage_match(struct lock_list *entry, void *mask) static inline bool usage_skip(struct lock_list *entry, void *mask) { + if (entry->class->lock_type == LD_LOCK_NORMAL) + return false; + /* * Skip local_lock() for irq inversion detection. * @@ -2279,14 +2282,16 @@ static inline bool usage_skip(struct lock_list *entry, void *mask) * As a result, we will skip local_lock(), when we search for irq * inversion bugs. */ - if (entry->class->lock_type == LD_LOCK_PERCPU) { - if (DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG)) - return false; + if (entry->class->lock_type == LD_LOCK_PERCPU && + DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG)) + return false; - return true; - } + /* + * Skip WAIT_OVERRIDE for irq inversion detection -- it's not actually + * a lock and only used to override the wait_type. + */ - return false; + return true; } /* @@ -4752,7 +4757,8 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next) for (; depth < curr->lockdep_depth; depth++) { struct held_lock *prev = curr->held_locks + depth; - u8 prev_inner = hlock_class(prev)->wait_type_inner; + struct lock_class *class = hlock_class(prev); + u8 prev_inner = class->wait_type_inner; if (prev_inner) { /* @@ -4762,6 +4768,14 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next) * Also due to trylocks. */ curr_inner = min(curr_inner, prev_inner); + + /* + * Allow override for annotations -- this is typically + * only valid/needed for code that only exists when + * CONFIG_PREEMPT_RT=n. + */ + if (unlikely(class->lock_type == LD_LOCK_WAIT_OVERRIDE)) + curr_inner = prev_inner; } } diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 003edc5ebd67..826c617b10a7 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -591,10 +591,21 @@ static void debug_objects_fill_pool(void) { /* * On RT enabled kernels the pool refill must happen in preemptible - * context: + * context -- for !RT kernels we rely on the fact that spinlock_t and + * raw_spinlock_t are basically the same type and this lock-type + * inversion works just fine. */ - if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) + if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) { + /* + * Annotate away the spinlock_t inside raw_spinlock_t warning + * by temporarily raising the wait-type to WAIT_SLEEP, matching + * the preemptible() condition above. + */ + static DEFINE_WAIT_OVERRIDE_MAP(fill_pool_map, LD_WAIT_SLEEP); + lock_map_acquire_try(&fill_pool_map); fill_pool(); + lock_map_release(&fill_pool_map); + } } static void -- cgit