From e4ab322fbaaaf84b23d6cb0e3317a7f68baf36dc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 17 Sep 2023 13:22:17 +0200 Subject: cleanup: Add conditional guard support Adds: - DEFINE_GUARD_COND() / DEFINE_LOCK_GUARD_1_COND() to extend existing guards with conditional lock primitives, eg. mutex_trylock(), mutex_lock_interruptible(). nb. both primitives allow NULL 'locks', which cause the lock to fail (obviously). - extends scoped_guard() to not take the body when the the conditional guard 'fails'. eg. scoped_guard (mutex_intr, &task->signal_cred_guard_mutex) { ... } will only execute the body when the mutex is held. - provides scoped_cond_guard(name, fail, args...); which extends scoped_guard() to do fail when the lock-acquire fails. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20231102110706.460851167%40infradead.org --- include/linux/cleanup.h | 52 +++++++++++++++++++++++++++++++++++++++++++++--- include/linux/mutex.h | 3 ++- include/linux/rwsem.h | 8 ++++---- include/linux/spinlock.h | 15 ++++++++++++++ 4 files changed, 70 insertions(+), 8 deletions(-) diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 9f1a9c455b68..c2d09bc4f976 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -125,25 +125,55 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * trivial wrapper around DEFINE_CLASS() above specifically * for locks. * + * DEFINE_GUARD_COND(name, ext, condlock) + * wrapper around EXTEND_CLASS above to add conditional lock + * variants to a base class, eg. mutex_trylock() or + * mutex_lock_interruptible(). + * * guard(name): - * an anonymous instance of the (guard) class + * an anonymous instance of the (guard) class, not recommended for + * conditional locks. * * scoped_guard (name, args...) { }: * similar to CLASS(name, scope)(args), except the variable (with the * explicit name 'scope') is declard in a for-loop such that its scope is * bound to the next (compound) statement. * + * for conditional locks the loop body is skipped when the lock is not + * acquired. + * + * scoped_cond_guard (name, fail, args...) { }: + * similar to scoped_guard(), except it does fail when the lock + * acquire fails. + * */ #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ - DEFINE_CLASS(_name, _type, _unlock, ({ _lock; _T; }), _type _T) + DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ + static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ + { return *_T; } + +#define DEFINE_GUARD_COND(_name, _ext, _condlock) \ + EXTEND_CLASS(_name, _ext, \ + ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ + class_##_name##_t _T) \ + static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ + { return class_##_name##_lock_ptr(_T); } #define guard(_name) \ CLASS(_name, __UNIQUE_ID(guard)) +#define __guard_ptr(_name) class_##_name##_lock_ptr + #define scoped_guard(_name, args...) \ for (CLASS(_name, scope)(args), \ - *done = NULL; !done; done = (void *)1) + *done = NULL; __guard_ptr(_name)(&scope) && !done; done = (void *)1) + +#define scoped_cond_guard(_name, _fail, args...) \ + for (CLASS(_name, scope)(args), \ + *done = NULL; !done; done = (void *)1) \ + if (!__guard_ptr(_name)(&scope)) _fail; \ + else /* * Additional helper macros for generating lock guards with types, either for @@ -152,6 +182,7 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ * * DEFINE_LOCK_GUARD_0(name, lock, unlock, ...) * DEFINE_LOCK_GUARD_1(name, type, lock, unlock, ...) + * DEFINE_LOCK_GUARD_1_COND(name, ext, condlock) * * will result in the following type: * @@ -173,6 +204,11 @@ typedef struct { \ static inline void class_##_name##_destructor(class_##_name##_t *_T) \ { \ if (_T->lock) { _unlock; } \ +} \ + \ +static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ +{ \ + return _T->lock; \ } @@ -201,4 +237,14 @@ __DEFINE_LOCK_GUARD_1(_name, _type, _lock) __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ __DEFINE_LOCK_GUARD_0(_name, _lock) +#define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ + EXTEND_CLASS(_name, _ext, \ + ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ + if (_T->lock && !(_condlock)) _T->lock = NULL; \ + _t; }), \ + typeof_member(class_##_name##_t, lock) l) \ + static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ + { return class_##_name##_lock_ptr(_T); } + + #endif /* __LINUX_GUARDS_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h index a33aa9eb9fc3..95d11308f995 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -221,6 +221,7 @@ extern void mutex_unlock(struct mutex *lock); extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) -DEFINE_FREE(mutex, struct mutex *, if (_T) mutex_unlock(_T)) +DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) +DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) #endif /* __LINUX_MUTEX_H */ diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 1dd530ce8b45..9c29689ff505 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -203,11 +203,11 @@ extern void up_read(struct rw_semaphore *sem); extern void up_write(struct rw_semaphore *sem); DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) -DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) - -DEFINE_FREE(up_read, struct rw_semaphore *, if (_T) up_read(_T)) -DEFINE_FREE(up_write, struct rw_semaphore *, if (_T) up_write(_T)) +DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T)) +DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0) +DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) +DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) /* * downgrade write lock to read lock diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 31d3d747a9db..ceb56b39c70f 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -507,6 +507,8 @@ DEFINE_LOCK_GUARD_1(raw_spinlock, raw_spinlock_t, raw_spin_lock(_T->lock), raw_spin_unlock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock, _try, raw_spin_trylock(_T->lock)) + DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t, raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING), raw_spin_unlock(_T->lock)) @@ -515,23 +517,36 @@ DEFINE_LOCK_GUARD_1(raw_spinlock_irq, raw_spinlock_t, raw_spin_lock_irq(_T->lock), raw_spin_unlock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irq, _try, raw_spin_trylock_irq(_T->lock)) + DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t, raw_spin_lock_irqsave(_T->lock, _T->flags), raw_spin_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irqsave, _try, + raw_spin_trylock_irqsave(_T->lock, _T->flags)) + DEFINE_LOCK_GUARD_1(spinlock, spinlock_t, spin_lock(_T->lock), spin_unlock(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(spinlock, _try, spin_trylock(_T->lock)) + DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, spin_lock_irq(_T->lock), spin_unlock_irq(_T->lock)) +DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try, + spin_trylock_irq(_T->lock)) + DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, spin_lock_irqsave(_T->lock, _T->flags), spin_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) +DEFINE_LOCK_GUARD_1_COND(spinlock_irqsave, _try, + spin_trylock_irqsave(_T->lock, _T->flags)) + #undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ -- cgit From 28a9466d75a861c26ba57b64a0a9a436a7c61e77 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Nov 2023 10:00:59 +0100 Subject: MAINTAINERS: Add include/linux/lockdep*.h Have lockdep_api.h and lockdep_types.h match as well. Signed-off-by: Christophe JAILLET Signed-off-by: Ingo Molnar Acked-by: Waiman Long Link: https://lore.kernel.org/r/e722abd043e5de64d2acd28d581e4a952994a94e.1700989248.git.christophe.jaillet@wanadoo.fr --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 97f51d5ec1cf..9834adcdd50a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12458,7 +12458,7 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core F: Documentation/locking/ F: arch/*/include/asm/spinlock*.h -F: include/linux/lockdep.h +F: include/linux/lockdep*.h F: include/linux/mutex*.h F: include/linux/rwlock*.h F: include/linux/rwsem*.h -- cgit From 18caaedaf4c3712ab6821f292598a8f86e6d7972 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 26 Nov 2023 09:56:29 +0100 Subject: locking/lockdep: Slightly reorder 'struct lock_class' to save some memory Based on pahole, 2 holes can be combined in the 'struct lock_class'. This saves 8 bytes in the structure on my x86_64. On a x86_64 configured with allmodconfig, this saves ~64kb of memory in 'kernel/locking/lockdep.o': text data bss dec filename Before: 102,501 1,912,490 11,531,636 13,546,627 kernel/locking/lockdep.o After: 102,181 1,912,490 11,466,100 13,480,771 kernel/locking/lockdep.o because of: struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; After the reorder, pahole gives: struct lock_class { struct hlist_node hash_entry; /* 0 16 */ struct list_head lock_entry; /* 16 16 */ struct list_head locks_after; /* 32 16 */ struct list_head locks_before; /* 48 16 */ /* --- cacheline 1 boundary (64 bytes) --- */ const struct lockdep_subclass_key * key; /* 64 8 */ lock_cmp_fn cmp_fn; /* 72 8 */ lock_print_fn print_fn; /* 80 8 */ unsigned int subclass; /* 88 4 */ unsigned int dep_gen_id; /* 92 4 */ long unsigned int usage_mask; /* 96 8 */ const struct lock_trace * usage_traces[10]; /* 104 80 */ /* --- cacheline 2 boundary (128 bytes) was 56 bytes ago --- */ const char * name; /* 184 8 */ /* --- cacheline 3 boundary (192 bytes) --- */ int name_version; /* 192 4 */ u8 wait_type_inner; /* 196 1 */ u8 wait_type_outer; /* 197 1 */ u8 lock_type; /* 198 1 */ /* XXX 1 byte hole, try to pack */ long unsigned int contention_point[4]; /* 200 32 */ long unsigned int contending_point[4]; /* 232 32 */ /* size: 264, cachelines: 5, members: 18 */ /* sum members: 263, holes: 1, sum holes: 1 */ /* last cacheline: 8 bytes */ }; Signed-off-by: Christophe JAILLET Signed-off-by: Ingo Molnar Acked-by: Waiman Long Link: https://lore.kernel.org/r/801258371fc4101f96495a5aaecef638d6cbd8d3.1700988869.git.christophe.jaillet@wanadoo.fr --- include/linux/lockdep_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index 2ebc323d345a..857d785e89e6 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -127,12 +127,12 @@ struct lock_class { unsigned long usage_mask; const struct lock_trace *usage_traces[LOCK_TRACE_STATES]; + const char *name; /* * Generation counter, when doing certain classes of graph walking, * to ensure that we check one node only once: */ int name_version; - const char *name; u8 wait_type_inner; u8 wait_type_outer; -- cgit From 5431fdd2c181dd2eac218e45b44deb2925fa48f0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 17 Sep 2023 13:24:21 +0200 Subject: ptrace: Convert ptrace_attach() to use lock guards Created as testing for the conditional guard infrastructure. Specifically this makes use of the following form: scoped_cond_guard (mutex_intr, return -ERESTARTNOINTR, &task->signal->cred_guard_mutex) { ... } ... return 0; Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Link: https://lkml.kernel.org/r/20231102110706.568467727%40infradead.org --- include/linux/sched/task.h | 2 + include/linux/spinlock.h | 26 +++++++++ kernel/ptrace.c | 128 +++++++++++++++++++++------------------------ 3 files changed, 89 insertions(+), 67 deletions(-) diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index a23af225c898..4f3dca353556 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -226,4 +226,6 @@ static inline void task_unlock(struct task_struct *p) spin_unlock(&p->alloc_lock); } +DEFINE_GUARD(task_lock, struct task_struct *, task_lock(_T), task_unlock(_T)) + #endif /* _LINUX_SCHED_TASK_H */ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index ceb56b39c70f..90bc853cafb6 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -548,5 +548,31 @@ DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, DEFINE_LOCK_GUARD_1_COND(spinlock_irqsave, _try, spin_trylock_irqsave(_T->lock, _T->flags)) +DEFINE_LOCK_GUARD_1(read_lock, rwlock_t, + read_lock(_T->lock), + read_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(read_lock_irq, rwlock_t, + read_lock_irq(_T->lock), + read_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(read_lock_irqsave, rwlock_t, + read_lock_irqsave(_T->lock, _T->flags), + read_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + +DEFINE_LOCK_GUARD_1(write_lock, rwlock_t, + write_lock(_T->lock), + write_unlock(_T->lock)) + +DEFINE_LOCK_GUARD_1(write_lock_irq, rwlock_t, + write_lock_irq(_T->lock), + write_unlock_irq(_T->lock)) + +DEFINE_LOCK_GUARD_1(write_lock_irqsave, rwlock_t, + write_lock_irqsave(_T->lock, _T->flags), + write_unlock_irqrestore(_T->lock, _T->flags), + unsigned long flags) + #undef __LINUX_INSIDE_SPINLOCK_H #endif /* __LINUX_SPINLOCK_H */ diff --git a/kernel/ptrace.c b/kernel/ptrace.c index d8b5e13a2229..5c579fb9a5e3 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -386,6 +386,34 @@ static int check_ptrace_options(unsigned long data) return 0; } +static inline void ptrace_set_stopped(struct task_struct *task) +{ + guard(spinlock)(&task->sighand->siglock); + + /* + * If the task is already STOPPED, set JOBCTL_TRAP_STOP and + * TRAPPING, and kick it so that it transits to TRACED. TRAPPING + * will be cleared if the child completes the transition or any + * event which clears the group stop states happens. We'll wait + * for the transition to complete before returning from this + * function. + * + * This hides STOPPED -> RUNNING -> TRACED transition from the + * attaching thread but a different thread in the same group can + * still observe the transient RUNNING state. IOW, if another + * thread's WNOHANG wait(2) on the stopped tracee races against + * ATTACH, the wait(2) may fail due to the transient RUNNING. + * + * The following task_is_stopped() test is safe as both transitions + * in and out of STOPPED are protected by siglock. + */ + if (task_is_stopped(task) && + task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) { + task->jobctl &= ~JOBCTL_STOPPED; + signal_wake_up_state(task, __TASK_STOPPED); + } +} + static int ptrace_attach(struct task_struct *task, long request, unsigned long addr, unsigned long flags) @@ -393,17 +421,17 @@ static int ptrace_attach(struct task_struct *task, long request, bool seize = (request == PTRACE_SEIZE); int retval; - retval = -EIO; if (seize) { if (addr != 0) - goto out; + return -EIO; /* * This duplicates the check in check_ptrace_options() because * ptrace_attach() and ptrace_setoptions() have historically * used different error codes for unknown ptrace options. */ if (flags & ~(unsigned long)PTRACE_O_MASK) - goto out; + return -EIO; + retval = check_ptrace_options(flags); if (retval) return retval; @@ -414,88 +442,54 @@ static int ptrace_attach(struct task_struct *task, long request, audit_ptrace(task); - retval = -EPERM; if (unlikely(task->flags & PF_KTHREAD)) - goto out; + return -EPERM; if (same_thread_group(task, current)) - goto out; + return -EPERM; /* * Protect exec's credential calculations against our interference; * SUID, SGID and LSM creds get determined differently * under ptrace. */ - retval = -ERESTARTNOINTR; - if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) - goto out; + scoped_cond_guard (mutex_intr, return -ERESTARTNOINTR, + &task->signal->cred_guard_mutex) { - task_lock(task); - retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); - task_unlock(task); - if (retval) - goto unlock_creds; + scoped_guard (task_lock, task) { + retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); + if (retval) + return retval; + } - write_lock_irq(&tasklist_lock); - retval = -EPERM; - if (unlikely(task->exit_state)) - goto unlock_tasklist; - if (task->ptrace) - goto unlock_tasklist; + scoped_guard (write_lock_irq, &tasklist_lock) { + if (unlikely(task->exit_state)) + return -EPERM; + if (task->ptrace) + return -EPERM; - task->ptrace = flags; + task->ptrace = flags; - ptrace_link(task, current); + ptrace_link(task, current); - /* SEIZE doesn't trap tracee on attach */ - if (!seize) - send_sig_info(SIGSTOP, SEND_SIG_PRIV, task); + /* SEIZE doesn't trap tracee on attach */ + if (!seize) + send_sig_info(SIGSTOP, SEND_SIG_PRIV, task); - spin_lock(&task->sighand->siglock); + ptrace_set_stopped(task); + } + } /* - * If the task is already STOPPED, set JOBCTL_TRAP_STOP and - * TRAPPING, and kick it so that it transits to TRACED. TRAPPING - * will be cleared if the child completes the transition or any - * event which clears the group stop states happens. We'll wait - * for the transition to complete before returning from this - * function. - * - * This hides STOPPED -> RUNNING -> TRACED transition from the - * attaching thread but a different thread in the same group can - * still observe the transient RUNNING state. IOW, if another - * thread's WNOHANG wait(2) on the stopped tracee races against - * ATTACH, the wait(2) may fail due to the transient RUNNING. - * - * The following task_is_stopped() test is safe as both transitions - * in and out of STOPPED are protected by siglock. + * We do not bother to change retval or clear JOBCTL_TRAPPING + * if wait_on_bit() was interrupted by SIGKILL. The tracer will + * not return to user-mode, it will exit and clear this bit in + * __ptrace_unlink() if it wasn't already cleared by the tracee; + * and until then nobody can ptrace this task. */ - if (task_is_stopped(task) && - task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) { - task->jobctl &= ~JOBCTL_STOPPED; - signal_wake_up_state(task, __TASK_STOPPED); - } - - spin_unlock(&task->sighand->siglock); - - retval = 0; -unlock_tasklist: - write_unlock_irq(&tasklist_lock); -unlock_creds: - mutex_unlock(&task->signal->cred_guard_mutex); -out: - if (!retval) { - /* - * We do not bother to change retval or clear JOBCTL_TRAPPING - * if wait_on_bit() was interrupted by SIGKILL. The tracer will - * not return to user-mode, it will exit and clear this bit in - * __ptrace_unlink() if it wasn't already cleared by the tracee; - * and until then nobody can ptrace this task. - */ - wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE); - proc_ptrace_connector(task, PTRACE_ATTACH); - } + wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE); + proc_ptrace_connector(task, PTRACE_ATTACH); - return retval; + return 0; } /** -- cgit From a51749ab34d9e5dec548fe38ede7e01e8bb26454 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 30 Nov 2023 21:48:17 +0100 Subject: locking/mutex: Document that mutex_unlock() is non-atomic I have seen several cases of attempts to use mutex_unlock() to release an object such that the object can then be freed by another task. This is not safe because mutex_unlock(), in the MUTEX_FLAG_WAITERS && !MUTEX_FLAG_HANDOFF case, accesses the mutex structure after having marked it as unlocked; so mutex_unlock() requires its caller to ensure that the mutex stays alive until mutex_unlock() returns. If MUTEX_FLAG_WAITERS is set and there are real waiters, those waiters have to keep the mutex alive, but we could have a spurious MUTEX_FLAG_WAITERS left if an interruptible/killable waiter bailed between the points where __mutex_unlock_slowpath() did the cmpxchg reading the flags and where it acquired the wait_lock. ( With spinlocks, that kind of code pattern is allowed and, from what I remember, used in several places in the kernel. ) Document this, such a semantic difference between mutexes and spinlocks is fairly unintuitive. [ mingo: Made the changelog a bit more assertive, refined the comments. ] Signed-off-by: Jann Horn Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231130204817.2031407-1-jannh@google.com --- Documentation/locking/mutex-design.rst | 6 ++++++ kernel/locking/mutex.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/Documentation/locking/mutex-design.rst b/Documentation/locking/mutex-design.rst index 78540cd7f54b..7572339b2f12 100644 --- a/Documentation/locking/mutex-design.rst +++ b/Documentation/locking/mutex-design.rst @@ -101,6 +101,12 @@ features that make lock debugging easier and faster: - Detects multi-task circular deadlocks and prints out all affected locks and tasks (and only those tasks). +Releasing a mutex is not an atomic operation: Once a mutex release operation +has begun, another context may be able to acquire the mutex before the release +operation has fully completed. The mutex user must ensure that the mutex is not +destroyed while a release operation is still in progress - in other words, +callers of mutex_unlock() must ensure that the mutex stays alive until +mutex_unlock() has returned. Interfaces ---------- diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 2deeeca3e71b..cbae8c0b89ab 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -532,6 +532,11 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne * This function must not be used in interrupt context. Unlocking * of a not locked mutex is not allowed. * + * The caller must ensure that the mutex stays alive until this function has + * returned - mutex_unlock() can NOT directly be used to release an object such + * that another concurrent task can free it. + * Mutexes are different from spinlocks & refcounts in this aspect. + * * This function is similar to (but not equivalent to) up(). */ void __sched mutex_unlock(struct mutex *lock) -- cgit From 2b9d9e0a9ba0e24cb9c78336481f0ed8b2bc1ff2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Jan 2024 09:31:16 +0100 Subject: locking/mutex: Clarify that mutex_unlock(), and most other sleeping locks, can still use the lock object after it's unlocked Clarify the mutex lock lifetime rules a bit more. Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Jann Horn Cc: Linus Torvalds Link: https://lore.kernel.org/r/20231201121808.GL3818@noisy.programming.kicks-ass.net --- Documentation/locking/mutex-design.rst | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/Documentation/locking/mutex-design.rst b/Documentation/locking/mutex-design.rst index 7572339b2f12..7c30b4aa5e28 100644 --- a/Documentation/locking/mutex-design.rst +++ b/Documentation/locking/mutex-design.rst @@ -101,12 +101,24 @@ features that make lock debugging easier and faster: - Detects multi-task circular deadlocks and prints out all affected locks and tasks (and only those tasks). -Releasing a mutex is not an atomic operation: Once a mutex release operation -has begun, another context may be able to acquire the mutex before the release -operation has fully completed. The mutex user must ensure that the mutex is not -destroyed while a release operation is still in progress - in other words, -callers of mutex_unlock() must ensure that the mutex stays alive until -mutex_unlock() has returned. +Mutexes - and most other sleeping locks like rwsems - do not provide an +implicit reference for the memory they occupy, which reference is released +with mutex_unlock(). + +[ This is in contrast with spin_unlock() [or completion_done()], which + APIs can be used to guarantee that the memory is not touched by the + lock implementation after spin_unlock()/completion_done() releases + the lock. ] + +mutex_unlock() may access the mutex structure even after it has internally +released the lock already - so it's not safe for another context to +acquire the mutex and assume that the mutex_unlock() context is not using +the structure anymore. + +The mutex user must ensure that the mutex is not destroyed while a +release operation is still in progress - in other words, callers of +mutex_unlock() must ensure that the mutex stays alive until mutex_unlock() +has returned. Interfaces ---------- -- cgit