From 4624a14f4daa8ab4578d274555fd8847254ce339 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 14 Feb 2022 16:52:12 +0000 Subject: sched/preempt: Simplify irqentry_exit_cond_resched() callers Currently callers of irqentry_exit_cond_resched() need to be aware of whether the function should be indirected via a static call, leading to ugly ifdeffery in callers. Save them the hassle with a static inline wrapper that does the right thing. The raw_irqentry_exit_cond_resched() will also be useful in subsequent patches which will add conditional wrappers for preemption functions. Note: in arch/x86/entry/common.c, xen_pv_evtchn_do_upcall() always calls irqentry_exit_cond_resched() directly, even when PREEMPT_DYNAMIC is in use. I believe this is a latent bug (which this patch corrects), but I'm not entirely certain this wasn't deliberate. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ard Biesheuvel Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20220214165216.2231574-4-mark.rutland@arm.com --- kernel/entry/common.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'kernel/entry/common.c') diff --git a/kernel/entry/common.c b/kernel/entry/common.c index bad713684c2e..1739ca79613b 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -380,7 +380,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) return ret; } -void irqentry_exit_cond_resched(void) +void raw_irqentry_exit_cond_resched(void) { if (!preempt_count()) { /* Sanity check RCU and thread stack */ @@ -392,7 +392,7 @@ void irqentry_exit_cond_resched(void) } } #ifdef CONFIG_PREEMPT_DYNAMIC -DEFINE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched); +DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); #endif noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) @@ -420,13 +420,9 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) } instrumentation_begin(); - if (IS_ENABLED(CONFIG_PREEMPTION)) { -#ifdef CONFIG_PREEMPT_DYNAMIC - static_call(irqentry_exit_cond_resched)(); -#else + if (IS_ENABLED(CONFIG_PREEMPTION)) irqentry_exit_cond_resched(); -#endif - } + /* Covers both tracing and lockdep */ trace_hardirqs_on(); instrumentation_end(); -- cgit From 99cf983cc8bca4adb461b519664c939a565cfd4d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 14 Feb 2022 16:52:14 +0000 Subject: sched/preempt: Add PREEMPT_DYNAMIC using static keys Where an architecture selects HAVE_STATIC_CALL but not HAVE_STATIC_CALL_INLINE, each static call has an out-of-line trampoline which will either branch to a callee or return to the caller. On such architectures, a number of constraints can conspire to make those trampolines more complicated and potentially less useful than we'd like. For example: * Hardware and software control flow integrity schemes can require the addition of "landing pad" instructions (e.g. `BTI` for arm64), which will also be present at the "real" callee. * Limited branch ranges can require that trampolines generate or load an address into a register and perform an indirect branch (or at least have a slow path that does so). This loses some of the benefits of having a direct branch. * Interaction with SW CFI schemes can be complicated and fragile, e.g. requiring that we can recognise idiomatic codegen and remove indirections understand, at least until clang proves more helpful mechanisms for dealing with this. For PREEMPT_DYNAMIC, we don't need the full power of static calls, as we really only need to enable/disable specific preemption functions. We can achieve the same effect without a number of the pain points above by using static keys to fold early returns into the preemption functions themselves rather than in an out-of-line trampoline, effectively inlining the trampoline into the start of the function. For arm64, this results in good code generation. For example, the dynamic_cond_resched() wrapper looks as follows when enabled. When disabled, the first `B` is replaced with a `NOP`, resulting in an early return. | : | bti c | b // or `nop` | mov w0, #0x0 | ret | mrs x0, sp_el0 | ldr x0, [x0, #8] | cbnz x0, | paciasp | stp x29, x30, [sp, #-16]! | mov x29, sp | bl | mov w0, #0x1 | ldp x29, x30, [sp], #16 | autiasp | ret ... compared to the regular form of the function: | <__cond_resched>: | bti c | mrs x0, sp_el0 | ldr x1, [x0, #8] | cbz x1, <__cond_resched+0x18> | mov w0, #0x0 | ret | paciasp | stp x29, x30, [sp, #-16]! | mov x29, sp | bl | mov w0, #0x1 | ldp x29, x30, [sp], #16 | autiasp | ret Any architecture which implements static keys should be able to use this to implement PREEMPT_DYNAMIC with similar cost to non-inlined static calls. Since this is likely to have greater overhead than (inlined) static calls, PREEMPT_DYNAMIC is only defaulted to enabled when HAVE_PREEMPT_DYNAMIC_CALL is selected. Signed-off-by: Mark Rutland Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ard Biesheuvel Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20220214165216.2231574-6-mark.rutland@arm.com --- arch/Kconfig | 36 ++++++++++++++++++++++++++--- arch/x86/Kconfig | 2 +- include/linux/entry-common.h | 10 ++++++-- include/linux/kernel.h | 7 +++++- include/linux/sched.h | 10 +++++++- kernel/Kconfig.preempt | 3 ++- kernel/entry/common.c | 11 +++++++++ kernel/sched/core.c | 54 ++++++++++++++++++++++++++++++++++++++++++-- 8 files changed, 122 insertions(+), 11 deletions(-) (limited to 'kernel/entry/common.c') diff --git a/arch/Kconfig b/arch/Kconfig index 601691f1570f..d544abd14c01 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1278,11 +1278,41 @@ config HAVE_STATIC_CALL_INLINE config HAVE_PREEMPT_DYNAMIC bool + +config HAVE_PREEMPT_DYNAMIC_CALL + bool depends on HAVE_STATIC_CALL + select HAVE_PREEMPT_DYNAMIC + help + An architecture should select this if it can handle the preemption + model being selected at boot time using static calls. + + Where an architecture selects HAVE_STATIC_CALL_INLINE, any call to a + preemption function will be patched directly. + + Where an architecture does not select HAVE_STATIC_CALL_INLINE, any + call to a preemption function will go through a trampoline, and the + trampoline will be patched. + + It is strongly advised to support inline static call to avoid any + overhead. + +config HAVE_PREEMPT_DYNAMIC_KEY + bool + depends on HAVE_ARCH_JUMP_LABEL && CC_HAS_ASM_GOTO + select HAVE_PREEMPT_DYNAMIC help - Select this if the architecture support boot time preempt setting - on top of static calls. It is strongly advised to support inline - static call to avoid any overhead. + An architecture should select this if it can handle the preemption + model being selected at boot time using static keys. + + Each preemption function will be given an early return based on a + static key. This should have slightly lower overhead than non-inline + static calls, as this effectively inlines each trampoline into the + start of its callee. This may avoid redundant work, and may + integrate better with CFI schemes. + + This will have greater overhead than using inline static calls as + the call to the preemption function cannot be entirely elided. config ARCH_WANT_LD_ORPHAN_WARN bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ebe8fc76949a..f13cfdfb30ce 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -245,7 +245,7 @@ config X86 select HAVE_STACK_VALIDATION if X86_64 select HAVE_STATIC_CALL select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION - select HAVE_PREEMPT_DYNAMIC + select HAVE_PREEMPT_DYNAMIC_CALL select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index dfd84c59b144..141952f4fee8 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -456,13 +456,19 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); */ void raw_irqentry_exit_cond_resched(void); #ifdef CONFIG_PREEMPT_DYNAMIC +#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) #define irqentry_exit_cond_resched_dynamic_enabled raw_irqentry_exit_cond_resched #define irqentry_exit_cond_resched_dynamic_disabled NULL DECLARE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); #define irqentry_exit_cond_resched() static_call(irqentry_exit_cond_resched)() -#else -#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() +#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +void dynamic_irqentry_exit_cond_resched(void); +#define irqentry_exit_cond_resched() dynamic_irqentry_exit_cond_resched() #endif +#else /* CONFIG_PREEMPT_DYNAMIC */ +#define irqentry_exit_cond_resched() raw_irqentry_exit_cond_resched() +#endif /* CONFIG_PREEMPT_DYNAMIC */ /** * irqentry_exit - Handle return from exception that used irqentry_enter() diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 33f47a996513..a890428bcc1a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -99,7 +99,7 @@ struct user; extern int __cond_resched(void); # define might_resched() __cond_resched() -#elif defined(CONFIG_PREEMPT_DYNAMIC) +#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) extern int __cond_resched(void); @@ -110,6 +110,11 @@ static __always_inline void might_resched(void) static_call_mod(might_resched)(); } +#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) + +extern int dynamic_might_resched(void); +# define might_resched() dynamic_might_resched() + #else # define might_resched() do { } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 508b91d57470..de03ddeb064b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2020,7 +2020,7 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) extern int __cond_resched(void); -#ifdef CONFIG_PREEMPT_DYNAMIC +#if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) DECLARE_STATIC_CALL(cond_resched, __cond_resched); @@ -2029,6 +2029,14 @@ static __always_inline int _cond_resched(void) return static_call_mod(cond_resched)(); } +#elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +extern int dynamic_cond_resched(void); + +static __always_inline int _cond_resched(void) +{ + return dynamic_cond_resched(); +} + #else static inline int _cond_resched(void) diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index ce77f0265660..c2f1fd95a821 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -96,8 +96,9 @@ config PREEMPTION config PREEMPT_DYNAMIC bool "Preemption behaviour defined on boot" depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT + select JUMP_LABEL if HAVE_PREEMPT_DYNAMIC_KEY select PREEMPT_BUILD - default y + default y if HAVE_PREEMPT_DYNAMIC_CALL help This option allows to define the preemption model on the kernel command line parameter and thus override the default preemption diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 1739ca79613b..b145249ad91a 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -392,7 +393,17 @@ void raw_irqentry_exit_cond_resched(void) } } #ifdef CONFIG_PREEMPT_DYNAMIC +#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); +#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +void dynamic_irqentry_exit_cond_resched(void) +{ + if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) + return; + raw_irqentry_exit_cond_resched(); +} +#endif #endif noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 300c0454a2b8..9e65028189f4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -6484,21 +6485,31 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) */ if (likely(!preemptible())) return; - preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); EXPORT_SYMBOL(preempt_schedule); #ifdef CONFIG_PREEMPT_DYNAMIC +#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) #ifndef preempt_schedule_dynamic_enabled #define preempt_schedule_dynamic_enabled preempt_schedule #define preempt_schedule_dynamic_disabled NULL #endif DEFINE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled); EXPORT_STATIC_CALL_TRAMP(preempt_schedule); +#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule); +void __sched notrace dynamic_preempt_schedule(void) +{ + if (!static_branch_unlikely(&sk_dynamic_preempt_schedule)) + return; + preempt_schedule(); +} +NOKPROBE_SYMBOL(dynamic_preempt_schedule); +EXPORT_SYMBOL(dynamic_preempt_schedule); +#endif #endif - /** * preempt_schedule_notrace - preempt_schedule called by tracing @@ -6553,12 +6564,24 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) EXPORT_SYMBOL_GPL(preempt_schedule_notrace); #ifdef CONFIG_PREEMPT_DYNAMIC +#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) #ifndef preempt_schedule_notrace_dynamic_enabled #define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace #define preempt_schedule_notrace_dynamic_disabled NULL #endif DEFINE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled); EXPORT_STATIC_CALL_TRAMP(preempt_schedule_notrace); +#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +static DEFINE_STATIC_KEY_TRUE(sk_dynamic_preempt_schedule_notrace); +void __sched notrace dynamic_preempt_schedule_notrace(void) +{ + if (!static_branch_unlikely(&sk_dynamic_preempt_schedule_notrace)) + return; + preempt_schedule_notrace(); +} +NOKPROBE_SYMBOL(dynamic_preempt_schedule_notrace); +EXPORT_SYMBOL(dynamic_preempt_schedule_notrace); +#endif #endif #endif /* CONFIG_PREEMPTION */ @@ -8068,6 +8091,7 @@ EXPORT_SYMBOL(__cond_resched); #endif #ifdef CONFIG_PREEMPT_DYNAMIC +#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) #define cond_resched_dynamic_enabled __cond_resched #define cond_resched_dynamic_disabled ((void *)&__static_call_return0) DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched); @@ -8077,6 +8101,25 @@ EXPORT_STATIC_CALL_TRAMP(cond_resched); #define might_resched_dynamic_disabled ((void *)&__static_call_return0) DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched); EXPORT_STATIC_CALL_TRAMP(might_resched); +#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +static DEFINE_STATIC_KEY_FALSE(sk_dynamic_cond_resched); +int __sched dynamic_cond_resched(void) +{ + if (!static_branch_unlikely(&sk_dynamic_cond_resched)) + return 0; + return __cond_resched(); +} +EXPORT_SYMBOL(dynamic_cond_resched); + +static DEFINE_STATIC_KEY_FALSE(sk_dynamic_might_resched); +int __sched dynamic_might_resched(void) +{ + if (!static_branch_unlikely(&sk_dynamic_might_resched)) + return 0; + return __cond_resched(); +} +EXPORT_SYMBOL(dynamic_might_resched); +#endif #endif /* @@ -8206,8 +8249,15 @@ int sched_dynamic_mode(const char *str) return -EINVAL; } +#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) #define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled) #define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled) +#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) +#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key) +#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key) +#else +#error "Unsupported PREEMPT_DYNAMIC mechanism" +#endif void sched_dynamic_update(int mode) { -- cgit From bf9ad37dc8a30cce22ae95d6c2ca6abf8731d305 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 14 Jul 2015 14:26:34 +0200 Subject: signal, x86: Delay calling signals in atomic on RT enabled kernels On x86_64 we must disable preemption before we enable interrupts for stack faults, int3 and debugging, because the current task is using a per CPU debug stack defined by the IST. If we schedule out, another task can come in and use the same stack and cause the stack to be corrupted and crash the kernel on return. When CONFIG_PREEMPT_RT is enabled, spinlock_t locks become sleeping, and one of these is the spin lock used in signal handling. Some of the debug code (int3) causes do_trap() to send a signal. This function calls a spinlock_t lock that has been converted to a sleeping lock. If this happens, the above issues with the corrupted stack is possible. Instead of calling the signal right away, for PREEMPT_RT and x86, the signal information is stored on the stacks task_struct and TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume code will send the signal when preemption is enabled. [ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT to ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ] [bigeasy: Add on 32bit as per Yang Shi, minor rewording. ] [ tglx: Use a config option ] Signed-off-by: Oleg Nesterov Signed-off-by: Steven Rostedt Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/Ygq5aBB/qMQw6aP5@linutronix.de --- arch/x86/Kconfig | 1 + include/linux/sched.h | 3 +++ kernel/Kconfig.preempt | 12 +++++++++++- kernel/entry/common.c | 14 ++++++++++++++ kernel/signal.c | 40 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 69 insertions(+), 1 deletion(-) (limited to 'kernel/entry/common.c') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9f5bd41bf660..d557ac29b6cd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -120,6 +120,7 @@ config X86 select ARCH_WANTS_NO_INSTR select ARCH_WANT_HUGE_PMD_SHARE select ARCH_WANT_LD_ORPHAN_WARN + select ARCH_WANTS_RT_DELAYED_SIGNALS select ARCH_WANTS_THP_SWAP if X86_64 select ARCH_HAS_PARANOID_L1D_FLUSH select BUILDTIME_TABLE_SORT diff --git a/include/linux/sched.h b/include/linux/sched.h index 75ba8aa60248..098e37fd770a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1087,6 +1087,9 @@ struct task_struct { /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; +#ifdef CONFIG_RT_DELAYED_SIGNALS + struct kernel_siginfo forced_info; +#endif unsigned long sas_ss_sp; size_t sas_ss_size; unsigned int sas_ss_flags; diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index ce77f0265660..5644abd5f8a8 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -132,4 +132,14 @@ config SCHED_CORE which is the likely usage by Linux distributions, there should be no measurable impact on performance. - +config ARCH_WANTS_RT_DELAYED_SIGNALS + bool + help + This option is selected by architectures where raising signals + can happen in atomic contexts on PREEMPT_RT enabled kernels. This + option delays raising the signal until the return to user space + loop where it is also delivered. X86 requires this to deliver + signals from trap handlers which run on IST stacks. + +config RT_DELAYED_SIGNALS + def_bool PREEMPT_RT && ARCH_WANTS_RT_DELAYED_SIGNALS diff --git a/kernel/entry/common.c b/kernel/entry/common.c index bad713684c2e..0543a2c92f20 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -148,6 +148,18 @@ static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work) arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING); } +#ifdef CONFIG_RT_DELAYED_SIGNALS +static inline void raise_delayed_signal(void) +{ + if (unlikely(current->forced_info.si_signo)) { + force_sig_info(¤t->forced_info); + current->forced_info.si_signo = 0; + } +} +#else +static inline void raise_delayed_signal(void) { } +#endif + static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work) { @@ -162,6 +174,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, if (ti_work & _TIF_NEED_RESCHED) schedule(); + raise_delayed_signal(); + if (ti_work & _TIF_UPROBE) uprobe_notify_resume(regs); diff --git a/kernel/signal.c b/kernel/signal.c index 9b04631acde8..e93de6daa188 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1307,6 +1307,43 @@ enum sig_handler { HANDLER_EXIT, /* Only visible as the process exit code */ }; +/* + * On some archictectures, PREEMPT_RT has to delay sending a signal from a + * trap since it cannot enable preemption, and the signal code's + * spin_locks turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME + * which will send the signal on exit of the trap. + */ +#ifdef CONFIG_RT_DELAYED_SIGNALS +static inline bool force_sig_delayed(struct kernel_siginfo *info, + struct task_struct *t) +{ + if (!in_atomic()) + return false; + + if (WARN_ON_ONCE(t->forced_info.si_signo)) + return true; + + if (is_si_special(info)) { + WARN_ON_ONCE(info != SEND_SIG_PRIV); + t->forced_info.si_signo = info->si_signo; + t->forced_info.si_errno = 0; + t->forced_info.si_code = SI_KERNEL; + t->forced_info.si_pid = 0; + t->forced_info.si_uid = 0; + } else { + t->forced_info = *info; + } + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); + return true; +} +#else +static inline bool force_sig_delayed(struct kernel_siginfo *info, + struct task_struct *t) +{ + return false; +} +#endif + /* * Force a signal that the process can't ignore: if necessary * we unblock the signal and change any SIG_IGN to SIG_DFL. @@ -1327,6 +1364,9 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, struct k_sigaction *action; int sig = info->si_signo; + if (force_sig_delayed(info, t)) + return 0; + spin_lock_irqsave(&t->sighand->siglock, flags); action = &t->sighand->action[sig-1]; ignored = action->sa.sa_handler == SIG_IGN; -- cgit From 153474ba1a4aed0a7b797b4c2be8c35c7a4e57bd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 11:46:37 -0600 Subject: ptrace: Create ptrace_report_syscall_{entry,exit} in ptrace.h Rename tracehook_report_syscall_{entry,exit} to ptrace_report_syscall_{entry,exit} and place them in ptrace.h There is no longer any generic tracehook infractructure so make these ptrace specific functions ptrace specific. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-3-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- arch/Kconfig | 2 +- arch/alpha/kernel/ptrace.c | 5 ++-- arch/arc/kernel/ptrace.c | 5 ++-- arch/arm/kernel/ptrace.c | 5 ++-- arch/arm64/kernel/ptrace.c | 7 +++-- arch/csky/kernel/ptrace.c | 5 ++-- arch/h8300/kernel/ptrace.c | 5 ++-- arch/hexagon/kernel/traps.c | 6 ++--- arch/ia64/kernel/ptrace.c | 4 +-- arch/m68k/kernel/ptrace.c | 6 ++--- arch/microblaze/kernel/ptrace.c | 5 ++-- arch/mips/kernel/ptrace.c | 5 ++-- arch/nds32/include/asm/syscall.h | 2 +- arch/nds32/kernel/ptrace.c | 5 ++-- arch/nios2/kernel/ptrace.c | 5 ++-- arch/openrisc/kernel/ptrace.c | 5 ++-- arch/parisc/kernel/ptrace.c | 7 +++-- arch/powerpc/kernel/ptrace/ptrace.c | 8 +++--- arch/riscv/kernel/ptrace.c | 5 ++-- arch/sh/kernel/ptrace_32.c | 5 ++-- arch/sparc/kernel/ptrace_32.c | 5 ++-- arch/sparc/kernel/ptrace_64.c | 5 ++-- arch/um/kernel/ptrace.c | 5 ++-- arch/xtensa/kernel/ptrace.c | 5 ++-- include/asm-generic/syscall.h | 2 +- include/linux/entry-common.h | 6 ++--- include/linux/ptrace.h | 51 +++++++++++++++++++++++++++++++++++++ include/linux/tracehook.h | 51 ------------------------------------- include/uapi/linux/ptrace.h | 2 +- kernel/entry/common.c | 1 + 30 files changed, 109 insertions(+), 126 deletions(-) (limited to 'kernel/entry/common.c') diff --git a/arch/Kconfig b/arch/Kconfig index 678a80713b21..a517a949eb1d 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -217,7 +217,7 @@ config TRACE_IRQFLAGS_SUPPORT # asm/syscall.h supplying asm-generic/syscall.h interface # linux/regset.h user_regset interfaces # CORE_DUMP_USE_REGSET #define'd in linux/elf.h -# TIF_SYSCALL_TRACE calls tracehook_report_syscall_{entry,exit} +# TIF_SYSCALL_TRACE calls ptrace_report_syscall_{entry,exit} # TIF_NOTIFY_RESUME calls tracehook_notify_resume() # signal delivery calls tracehook_signal_handler() # diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index 8c43212ae38e..a1a239ea002d 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -323,7 +322,7 @@ asmlinkage unsigned long syscall_trace_enter(void) unsigned long ret = 0; struct pt_regs *regs = current_pt_regs(); if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(current_pt_regs())) + ptrace_report_syscall_entry(current_pt_regs())) ret = -1UL; audit_syscall_entry(regs->r0, regs->r16, regs->r17, regs->r18, regs->r19); return ret ?: current_pt_regs()->r0; @@ -334,5 +333,5 @@ syscall_trace_leave(void) { audit_syscall_exit(current_pt_regs()); if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(current_pt_regs(), 0); + ptrace_report_syscall_exit(current_pt_regs(), 0); } diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index 883391977fdf..54b419ac8bda 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -4,7 +4,6 @@ */ #include -#include #include #include #include @@ -258,7 +257,7 @@ long arch_ptrace(struct task_struct *child, long request, asmlinkage int syscall_trace_entry(struct pt_regs *regs) { - if (tracehook_report_syscall_entry(regs)) + if (ptrace_report_syscall_entry(regs)) return ULONG_MAX; return regs->r8; @@ -266,5 +265,5 @@ asmlinkage int syscall_trace_entry(struct pt_regs *regs) asmlinkage void syscall_trace_exit(struct pt_regs *regs) { - tracehook_report_syscall_exit(regs, 0); + ptrace_report_syscall_exit(regs, 0); } diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index e5aa3237853d..bfe88c6e60d5 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -843,8 +842,8 @@ static void report_syscall(struct pt_regs *regs, enum ptrace_syscall_dir dir) regs->ARM_ip = dir; if (dir == PTRACE_SYSCALL_EXIT) - tracehook_report_syscall_exit(regs, 0); - else if (tracehook_report_syscall_entry(regs)) + ptrace_report_syscall_exit(regs, 0); + else if (ptrace_report_syscall_entry(regs)) current_thread_info()->abi_syscall = -1; regs->ARM_ip = ip; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index b7845575f86f..230a47b9189e 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -1818,11 +1817,11 @@ static void report_syscall(struct pt_regs *regs, enum ptrace_syscall_dir dir) regs->regs[regno] = dir; if (dir == PTRACE_SYSCALL_ENTER) { - if (tracehook_report_syscall_entry(regs)) + if (ptrace_report_syscall_entry(regs)) forget_syscall(regs); regs->regs[regno] = saved_reg; } else if (!test_thread_flag(TIF_SINGLESTEP)) { - tracehook_report_syscall_exit(regs, 0); + ptrace_report_syscall_exit(regs, 0); regs->regs[regno] = saved_reg; } else { regs->regs[regno] = saved_reg; @@ -1832,7 +1831,7 @@ static void report_syscall(struct pt_regs *regs, enum ptrace_syscall_dir dir) * tracer modifications to the registers may have rewound the * state machine. */ - tracehook_report_syscall_exit(regs, 1); + ptrace_report_syscall_exit(regs, 1); } } diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c index 1a5f54e0d272..0f7e7b653c72 100644 --- a/arch/csky/kernel/ptrace.c +++ b/arch/csky/kernel/ptrace.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -321,7 +320,7 @@ long arch_ptrace(struct task_struct *child, long request, asmlinkage int syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) - if (tracehook_report_syscall_entry(regs)) + if (ptrace_report_syscall_entry(regs)) return -1; if (secure_computing() == -1) @@ -339,7 +338,7 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) audit_syscall_exit(regs); if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, 0); + ptrace_report_syscall_exit(regs, 0); if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_exit(regs, syscall_get_return_value(current, regs)); diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c index a11db009d0ea..a9898b27b756 100644 --- a/arch/h8300/kernel/ptrace.c +++ b/arch/h8300/kernel/ptrace.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -174,7 +173,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) long ret = 0; if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) + ptrace_report_syscall_entry(regs)) /* * Tracing decided this syscall should not happen. * We'll return a bogus call number to get an ENOSYS @@ -196,5 +195,5 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs) step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, step); + ptrace_report_syscall_exit(regs, step); } diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 1240f038cce0..6447763ce5a9 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -348,7 +348,7 @@ void do_trap0(struct pt_regs *regs) /* allow strace to catch syscall args */ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs))) + ptrace_report_syscall_entry(regs))) return; /* return -ENOSYS somewhere? */ /* Interrupts should be re-enabled for syscall processing */ @@ -386,7 +386,7 @@ void do_trap0(struct pt_regs *regs) /* allow strace to get the syscall return state */ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACE))) - tracehook_report_syscall_exit(regs, 0); + ptrace_report_syscall_exit(regs, 0); break; case TRAP_DEBUG: diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 6a1439eaa050..6af64aae087d 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1217,7 +1217,7 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, struct pt_regs regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) - if (tracehook_report_syscall_entry(®s)) + if (ptrace_report_syscall_entry(®s)) return -ENOSYS; /* copy user rbs to kernel rbs */ @@ -1243,7 +1243,7 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(®s, step); + ptrace_report_syscall_exit(®s, step); /* copy user rbs to kernel rbs */ if (test_thread_flag(TIF_RESTORE_RSE)) diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c index aa3a0b8d07e9..a0c99fe3118e 100644 --- a/arch/m68k/kernel/ptrace.c +++ b/arch/m68k/kernel/ptrace.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include @@ -282,13 +282,13 @@ asmlinkage int syscall_trace_enter(void) int ret = 0; if (test_thread_flag(TIF_SYSCALL_TRACE)) - ret = tracehook_report_syscall_entry(task_pt_regs(current)); + ret = ptrace_report_syscall_entry(task_pt_regs(current)); return ret; } asmlinkage void syscall_trace_leave(void) { if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(task_pt_regs(current), 0); + ptrace_report_syscall_exit(task_pt_regs(current), 0); } #endif /* CONFIG_COLDFIRE */ diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c index badd286882ae..5234d0c1dcaa 100644 --- a/arch/microblaze/kernel/ptrace.c +++ b/arch/microblaze/kernel/ptrace.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include @@ -140,7 +139,7 @@ asmlinkage unsigned long do_syscall_trace_enter(struct pt_regs *regs) secure_computing_strict(regs->r12); if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) + ptrace_report_syscall_entry(regs)) /* * Tracing decided this syscall should not happen. * We'll return a bogus call number to get an ENOSYS @@ -161,7 +160,7 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs) step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, step); + ptrace_report_syscall_exit(regs, step); } void ptrace_disable(struct task_struct *child) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index db7c5be1d4a3..567aec4abac0 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -1317,7 +1316,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) current_thread_info()->syscall = syscall; if (test_thread_flag(TIF_SYSCALL_TRACE)) { - if (tracehook_report_syscall_entry(regs)) + if (ptrace_report_syscall_entry(regs)) return -1; syscall = current_thread_info()->syscall; } @@ -1376,7 +1375,7 @@ asmlinkage void syscall_trace_leave(struct pt_regs *regs) trace_sys_exit(regs, regs_return_value(regs)); if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, 0); + ptrace_report_syscall_exit(regs, 0); user_enter(); } diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h index 90aa56c94af1..04d55ce18d50 100644 --- a/arch/nds32/include/asm/syscall.h +++ b/arch/nds32/include/asm/syscall.h @@ -39,7 +39,7 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs) * * It's only valid to call this when @task is stopped for system * call exit tracing (due to TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT), - * after tracehook_report_syscall_entry() returned nonzero to prevent + * after ptrace_report_syscall_entry() returned nonzero to prevent * the system call from taking place. * * This rolls back the register state in @regs so it's as if the diff --git a/arch/nds32/kernel/ptrace.c b/arch/nds32/kernel/ptrace.c index d0eda870fbc2..6a6988cf689d 100644 --- a/arch/nds32/kernel/ptrace.c +++ b/arch/nds32/kernel/ptrace.c @@ -3,7 +3,6 @@ #include #include -#include #include #include @@ -103,7 +102,7 @@ void user_disable_single_step(struct task_struct *child) asmlinkage int syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) { - if (tracehook_report_syscall_entry(regs)) + if (ptrace_report_syscall_entry(regs)) forget_syscall(regs); } return regs->syscallno; @@ -113,6 +112,6 @@ asmlinkage void syscall_trace_leave(struct pt_regs *regs) { int step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, step); + ptrace_report_syscall_exit(regs, step); } diff --git a/arch/nios2/kernel/ptrace.c b/arch/nios2/kernel/ptrace.c index a6ea9e1b4f61..cd62f310778b 100644 --- a/arch/nios2/kernel/ptrace.c +++ b/arch/nios2/kernel/ptrace.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -134,7 +133,7 @@ asmlinkage int do_syscall_trace_enter(void) int ret = 0; if (test_thread_flag(TIF_SYSCALL_TRACE)) - ret = tracehook_report_syscall_entry(task_pt_regs(current)); + ret = ptrace_report_syscall_entry(task_pt_regs(current)); return ret; } @@ -142,5 +141,5 @@ asmlinkage int do_syscall_trace_enter(void) asmlinkage void do_syscall_trace_exit(void) { if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(task_pt_regs(current), 0); + ptrace_report_syscall_exit(task_pt_regs(current), 0); } diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c index 4d60ae2a12fa..b971740fc2aa 100644 --- a/arch/openrisc/kernel/ptrace.c +++ b/arch/openrisc/kernel/ptrace.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -159,7 +158,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) long ret = 0; if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) + ptrace_report_syscall_entry(regs)) /* * Tracing decided this syscall should not happen. * We'll return a bogus call number to get an ENOSYS @@ -181,5 +180,5 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs) step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, step); + ptrace_report_syscall_exit(regs, step); } diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 65de6c4c9354..96ef6a6b66e5 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -316,7 +315,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, long do_syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) { - int rc = tracehook_report_syscall_entry(regs); + int rc = ptrace_report_syscall_entry(regs); /* * As tracesys_next does not set %r28 to -ENOSYS @@ -327,7 +326,7 @@ long do_syscall_trace_enter(struct pt_regs *regs) if (rc) { /* * A nonzero return code from - * tracehook_report_syscall_entry() tells us + * ptrace_report_syscall_entry() tells us * to prevent the syscall execution. Skip * the syscall call and the syscall restart handling. * @@ -381,7 +380,7 @@ void do_syscall_trace_exit(struct pt_regs *regs) #endif if (stepping || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, stepping); + ptrace_report_syscall_exit(regs, stepping); } diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index c43f77e2ac31..f394b0d6473f 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -16,7 +16,7 @@ */ #include -#include +#include #include #include #include @@ -263,12 +263,12 @@ long do_syscall_trace_enter(struct pt_regs *regs) flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); if (flags) { - int rc = tracehook_report_syscall_entry(regs); + int rc = ptrace_report_syscall_entry(regs); if (unlikely(flags & _TIF_SYSCALL_EMU)) { /* * A nonzero return code from - * tracehook_report_syscall_entry() tells us to prevent + * ptrace_report_syscall_entry() tells us to prevent * the syscall execution, but we are not going to * execute it anyway. * @@ -334,7 +334,7 @@ void do_syscall_trace_leave(struct pt_regs *regs) step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, step); + ptrace_report_syscall_exit(regs, step); } void __init pt_regs_check(void); diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index a89243730153..793c7da0554b 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -17,7 +17,6 @@ #include #include #include -#include #define CREATE_TRACE_POINTS #include @@ -241,7 +240,7 @@ long arch_ptrace(struct task_struct *child, long request, __visible int do_syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE)) - if (tracehook_report_syscall_entry(regs)) + if (ptrace_report_syscall_entry(regs)) return -1; /* @@ -266,7 +265,7 @@ __visible void do_syscall_trace_exit(struct pt_regs *regs) audit_syscall_exit(regs); if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, 0); + ptrace_report_syscall_exit(regs, 0); #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 5281685f6ad1..d417988d9770 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -456,7 +455,7 @@ long arch_ptrace(struct task_struct *child, long request, asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) { if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) { + ptrace_report_syscall_entry(regs)) { regs->regs[0] = -ENOSYS; return -1; } @@ -484,5 +483,5 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs) step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, step); + ptrace_report_syscall_exit(regs, step); } diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c index 5318174a0268..e7db48acb838 100644 --- a/arch/sparc/kernel/ptrace_32.c +++ b/arch/sparc/kernel/ptrace_32.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -439,9 +438,9 @@ asmlinkage int syscall_trace(struct pt_regs *regs, int syscall_exit_p) if (test_thread_flag(TIF_SYSCALL_TRACE)) { if (syscall_exit_p) - tracehook_report_syscall_exit(regs, 0); + ptrace_report_syscall_exit(regs, 0); else - ret = tracehook_report_syscall_entry(regs); + ret = ptrace_report_syscall_entry(regs); } return ret; diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 2b92155db8a5..86a7eb5c27ba 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -1095,7 +1094,7 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) user_exit(); if (test_thread_flag(TIF_SYSCALL_TRACE)) - ret = tracehook_report_syscall_entry(regs); + ret = ptrace_report_syscall_entry(regs); if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->u_regs[UREG_G1]); @@ -1118,7 +1117,7 @@ asmlinkage void syscall_trace_leave(struct pt_regs *regs) trace_sys_exit(regs, regs->u_regs[UREG_I0]); if (test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, 0); + ptrace_report_syscall_exit(regs, 0); if (test_thread_flag(TIF_NOHZ)) user_enter(); diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index b425f47bddbb..bfaf6ab1ac03 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include @@ -135,7 +134,7 @@ int syscall_trace_enter(struct pt_regs *regs) if (!test_thread_flag(TIF_SYSCALL_TRACE)) return 0; - return tracehook_report_syscall_entry(regs); + return ptrace_report_syscall_entry(regs); } void syscall_trace_leave(struct pt_regs *regs) @@ -151,7 +150,7 @@ void syscall_trace_leave(struct pt_regs *regs) if (!test_thread_flag(TIF_SYSCALL_TRACE)) return; - tracehook_report_syscall_exit(regs, 0); + ptrace_report_syscall_exit(regs, 0); /* force do_signal() --> is_syscall() */ if (ptraced & PT_PTRACED) set_thread_flag(TIF_SIGPENDING); diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index bb3f4797d212..323c678a691f 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #define CREATE_TRACE_POINTS @@ -550,7 +549,7 @@ int do_syscall_trace_enter(struct pt_regs *regs) regs->areg[2] = -ENOSYS; if (test_thread_flag(TIF_SYSCALL_TRACE) && - tracehook_report_syscall_entry(regs)) { + ptrace_report_syscall_entry(regs)) { regs->areg[2] = -ENOSYS; regs->syscall = NO_SYSCALL; return 0; @@ -583,5 +582,5 @@ void do_syscall_trace_leave(struct pt_regs *regs) step = test_thread_flag(TIF_SINGLESTEP); if (step || test_thread_flag(TIF_SYSCALL_TRACE)) - tracehook_report_syscall_exit(regs, step); + ptrace_report_syscall_exit(regs, step); } diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h index 81695eb02a12..5a80fe728dc8 100644 --- a/include/asm-generic/syscall.h +++ b/include/asm-generic/syscall.h @@ -44,7 +44,7 @@ int syscall_get_nr(struct task_struct *task, struct pt_regs *regs); * * It's only valid to call this when @task is stopped for system * call exit tracing (due to %SYSCALL_WORK_SYSCALL_TRACE or - * %SYSCALL_WORK_SYSCALL_AUDIT), after tracehook_report_syscall_entry() + * %SYSCALL_WORK_SYSCALL_AUDIT), after ptrace_report_syscall_entry() * returned nonzero to prevent the system call from taking place. * * This rolls back the register state in @regs so it's as if the diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 2e2b8d6140ed..a670e9fba7a9 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -3,7 +3,7 @@ #define __LINUX_ENTRYCOMMON_H #include -#include +#include #include #include #include @@ -95,7 +95,7 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs #ifndef arch_syscall_enter_tracehook static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs) { - return tracehook_report_syscall_entry(regs); + return ptrace_report_syscall_entry(regs); } #endif @@ -294,7 +294,7 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step); #ifndef arch_syscall_exit_tracehook static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) { - tracehook_report_syscall_exit(regs, step); + ptrace_report_syscall_exit(regs, step); } #endif diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 91b1074edb4c..5310f43e4762 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -440,4 +440,55 @@ static inline int ptrace_report_syscall(unsigned long message) current->ptrace_message = 0; return fatal_signal_pending(current); } + +/** + * ptrace_report_syscall_entry - task is about to attempt a system call + * @regs: user register state of current task + * + * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or + * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just + * entered the kernel for a system call. Full user register state is + * available here. Changing the values in @regs can affect the system + * call number and arguments to be tried. It is safe to block here, + * preventing the system call from beginning. + * + * Returns zero normally, or nonzero if the calling arch code should abort + * the system call. That must prevent normal entry so no system call is + * made. If @task ever returns to user mode after this, its register state + * is unspecified, but should be something harmless like an %ENOSYS error + * return. It should preserve enough information so that syscall_rollback() + * can work (see asm-generic/syscall.h). + * + * Called without locks, just after entering kernel mode. + */ +static inline __must_check int ptrace_report_syscall_entry( + struct pt_regs *regs) +{ + return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY); +} + +/** + * ptrace_report_syscall_exit - task has just finished a system call + * @regs: user register state of current task + * @step: nonzero if simulating single-step or block-step + * + * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when + * the current task has just finished an attempted system call. Full + * user register state is available here. It is safe to block here, + * preventing signals from being processed. + * + * If @step is nonzero, this report is also in lieu of the normal + * trap that would follow the system call instruction because + * user_enable_block_step() or user_enable_single_step() was used. + * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set. + * + * Called without locks, just before checking for pending signals. + */ +static inline void ptrace_report_syscall_exit(struct pt_regs *regs, int step) +{ + if (step) + user_single_step_report(regs); + else + ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT); +} #endif diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 998bc3863559..819e82ac09bd 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -52,57 +52,6 @@ struct linux_binprm; -/** - * tracehook_report_syscall_entry - task is about to attempt a system call - * @regs: user register state of current task - * - * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or - * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just - * entered the kernel for a system call. Full user register state is - * available here. Changing the values in @regs can affect the system - * call number and arguments to be tried. It is safe to block here, - * preventing the system call from beginning. - * - * Returns zero normally, or nonzero if the calling arch code should abort - * the system call. That must prevent normal entry so no system call is - * made. If @task ever returns to user mode after this, its register state - * is unspecified, but should be something harmless like an %ENOSYS error - * return. It should preserve enough information so that syscall_rollback() - * can work (see asm-generic/syscall.h). - * - * Called without locks, just after entering kernel mode. - */ -static inline __must_check int tracehook_report_syscall_entry( - struct pt_regs *regs) -{ - return ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_ENTRY); -} - -/** - * tracehook_report_syscall_exit - task has just finished a system call - * @regs: user register state of current task - * @step: nonzero if simulating single-step or block-step - * - * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when - * the current task has just finished an attempted system call. Full - * user register state is available here. It is safe to block here, - * preventing signals from being processed. - * - * If @step is nonzero, this report is also in lieu of the normal - * trap that would follow the system call instruction because - * user_enable_block_step() or user_enable_single_step() was used. - * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set. - * - * Called without locks, just before checking for pending signals. - */ -static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step) -{ - if (step) - user_single_step_report(regs); - else - ptrace_report_syscall(PTRACE_EVENTMSG_SYSCALL_EXIT); -} - /** * tracehook_signal_handler - signal handler setup is complete * @stepping: nonzero if debugger single-step or block-step in use diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index 3747bf816f9a..b7af92e07d1f 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -114,7 +114,7 @@ struct ptrace_rseq_configuration { /* * These values are stored in task->ptrace_message - * by tracehook_report_syscall_* to describe the current syscall-stop. + * by ptrace_report_syscall_* to describe the current syscall-stop. */ #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 diff --git a/kernel/entry/common.c b/kernel/entry/common.c index bad713684c2e..f52e57c4d6d8 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include -- cgit From 0cfcb2b9ef48bbcaf5d43b9f1893f63a938e8176 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 27 Jan 2022 12:00:55 -0600 Subject: ptrace: Remove arch_syscall_{enter,exit}_tracehook These functions are alwasy one-to-one wrappers around ptrace_report_syscall_entry and ptrace_report_syscall_exit. So directly call the functions they are wrapping instead. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-4-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- include/linux/entry-common.h | 43 ++----------------------------------------- kernel/entry/common.c | 4 ++-- 2 files changed, 4 insertions(+), 43 deletions(-) (limited to 'kernel/entry/common.c') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index a670e9fba7a9..9efbdda61f7a 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -79,26 +79,6 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs); static __always_inline void arch_check_user_regs(struct pt_regs *regs) {} #endif -/** - * arch_syscall_enter_tracehook - Wrapper around tracehook_report_syscall_entry() - * @regs: Pointer to currents pt_regs - * - * Returns: 0 on success or an error code to skip the syscall. - * - * Defaults to tracehook_report_syscall_entry(). Can be replaced by - * architecture specific code. - * - * Invoked from syscall_enter_from_user_mode() - */ -static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs); - -#ifndef arch_syscall_enter_tracehook -static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs) -{ - return ptrace_report_syscall_entry(regs); -} -#endif - /** * enter_from_user_mode - Establish state when coming from user mode * @@ -157,7 +137,7 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); * It handles the following work items: * * 1) syscall_work flag dependent invocations of - * arch_syscall_enter_tracehook(), __secure_computing(), trace_sys_enter() + * ptrace_report_syscall_entry(), __secure_computing(), trace_sys_enter() * 2) Invocation of audit_syscall_entry() */ long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); @@ -279,25 +259,6 @@ static __always_inline void arch_exit_to_user_mode(void) { } */ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal); -/** - * arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit() - * @regs: Pointer to currents pt_regs - * @step: Indicator for single step - * - * Defaults to tracehook_report_syscall_exit(). Can be replaced by - * architecture specific code. - * - * Invoked from syscall_exit_to_user_mode() - */ -static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step); - -#ifndef arch_syscall_exit_tracehook -static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step) -{ - ptrace_report_syscall_exit(regs, step); -} -#endif - /** * exit_to_user_mode - Fixup state when exiting to user mode * @@ -347,7 +308,7 @@ void syscall_exit_to_user_mode_work(struct pt_regs *regs); * - rseq syscall exit * - audit * - syscall tracing - * - tracehook (single stepping) + * - ptrace (single stepping) * * 2) Preparatory work * - Exit to user mode loop (common TIF handling). Invokes diff --git a/kernel/entry/common.c b/kernel/entry/common.c index f52e57c4d6d8..f0b1daa1e8da 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -59,7 +59,7 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, /* Handle ptrace */ if (work & (SYSCALL_WORK_SYSCALL_TRACE | SYSCALL_WORK_SYSCALL_EMU)) { - ret = arch_syscall_enter_tracehook(regs); + ret = ptrace_report_syscall_entry(regs); if (ret || (work & SYSCALL_WORK_SYSCALL_EMU)) return -1L; } @@ -253,7 +253,7 @@ static void syscall_exit_work(struct pt_regs *regs, unsigned long work) step = report_single_step(work); if (step || work & SYSCALL_WORK_SYSCALL_TRACE) - arch_syscall_exit_tracehook(regs, step); + ptrace_report_syscall_exit(regs, step); } /* -- cgit From 8ba62d37949e248c698c26e0d82d72fda5d33ebf Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 Feb 2022 09:51:14 -0600 Subject: task_work: Call tracehook_notify_signal from get_signal on all architectures Always handle TIF_NOTIFY_SIGNAL in get_signal. With commit 35d0b389f3b2 ("task_work: unconditionally run task_work from get_signal()") always calling task_work_run all of the work of tracehook_notify_signal is already happening except clearing TIF_NOTIFY_SIGNAL. Factor clear_notify_signal out of tracehook_notify_signal and use it in get_signal so that get_signal only needs one call of task_work_run. To keep the semantics in sync update xfer_to_guest_mode_work (which does not call get_signal) to call tracehook_notify_signal if either _TIF_SIGPENDING or _TIF_NOTIFY_SIGNAL. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-8-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- arch/s390/kernel/signal.c | 4 ++-- arch/x86/kernel/signal.c | 4 ++-- include/linux/entry-common.h | 2 +- include/linux/tracehook.h | 9 +++++++-- kernel/entry/common.c | 12 ++---------- kernel/entry/kvm.c | 2 +- kernel/signal.c | 14 +++----------- 7 files changed, 18 insertions(+), 29 deletions(-) (limited to 'kernel/entry/common.c') diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 307f5d99514d..ea9e5e8182cd 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -453,7 +453,7 @@ static void handle_signal(struct ksignal *ksig, sigset_t *oldset, * stack-frames in one go after that. */ -void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) +void arch_do_signal_or_restart(struct pt_regs *regs) { struct ksignal ksig; sigset_t *oldset = sigmask_to_save(); @@ -466,7 +466,7 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) current->thread.system_call = test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0; - if (has_signal && get_signal(&ksig)) { + if (get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ if (current->thread.system_call) { regs->int_code = current->thread.system_call; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ec71e06ae364..de3d5b5724d8 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -861,11 +861,11 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ -void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) +void arch_do_signal_or_restart(struct pt_regs *regs) { struct ksignal ksig; - if (has_signal && get_signal(&ksig)) { + if (get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ handle_signal(&ksig, regs); return; diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 9efbdda61f7a..3537fd25f14e 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -257,7 +257,7 @@ static __always_inline void arch_exit_to_user_mode(void) { } * * Invoked from exit_to_user_mode_loop(). */ -void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal); +void arch_do_signal_or_restart(struct pt_regs *regs); /** * exit_to_user_mode - Fixup state when exiting to user mode diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index fa834a22e86e..b44a7820c468 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -106,6 +106,12 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) rseq_handle_notify_resume(NULL, regs); } +static inline void clear_notify_signal(void) +{ + clear_thread_flag(TIF_NOTIFY_SIGNAL); + smp_mb__after_atomic(); +} + /* * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This * is currently used by TWA_SIGNAL based task_work, which requires breaking @@ -113,8 +119,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) */ static inline void tracehook_notify_signal(void) { - clear_thread_flag(TIF_NOTIFY_SIGNAL); - smp_mb__after_atomic(); + clear_notify_signal(); if (task_work_pending(current)) task_work_run(); } diff --git a/kernel/entry/common.c b/kernel/entry/common.c index f0b1daa1e8da..79eaf9b4b10d 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -139,15 +139,7 @@ void noinstr exit_to_user_mode(void) } /* Workaround to allow gradual conversion of architecture code */ -void __weak arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) { } - -static void handle_signal_work(struct pt_regs *regs, unsigned long ti_work) -{ - if (ti_work & _TIF_NOTIFY_SIGNAL) - tracehook_notify_signal(); - - arch_do_signal_or_restart(regs, ti_work & _TIF_SIGPENDING); -} +void __weak arch_do_signal_or_restart(struct pt_regs *regs) { } static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work) @@ -170,7 +162,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, klp_update_patch_state(current); if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) - handle_signal_work(regs, ti_work); + arch_do_signal_or_restart(regs); if (ti_work & _TIF_NOTIFY_RESUME) tracehook_notify_resume(regs); diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index 96d476e06c77..cabf36a489e4 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -8,7 +8,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) do { int ret; - if (ti_work & _TIF_NOTIFY_SIGNAL) + if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) tracehook_notify_signal(); if (ti_work & _TIF_SIGPENDING) { diff --git a/kernel/signal.c b/kernel/signal.c index 3b4cf25fb9b3..8632b88982c9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2626,20 +2626,12 @@ bool get_signal(struct ksignal *ksig) struct signal_struct *signal = current->signal; int signr; + clear_notify_signal(); if (unlikely(task_work_pending(current))) task_work_run(); - /* - * For non-generic architectures, check for TIF_NOTIFY_SIGNAL so - * that the arch handlers don't all have to do it. If we get here - * without TIF_SIGPENDING, just exit after running signal work. - */ - if (!IS_ENABLED(CONFIG_GENERIC_ENTRY)) { - if (test_thread_flag(TIF_NOTIFY_SIGNAL)) - tracehook_notify_signal(); - if (!task_sigpending(current)) - return false; - } + if (!task_sigpending(current)) + return false; if (unlikely(uprobe_deny_signal())) return false; -- cgit From 03248addadf1a5ef0a03cbcd5ec905b49adb9658 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 9 Feb 2022 12:20:45 -0600 Subject: resume_user_mode: Move to resume_user_mode.h Move set_notify_resume and tracehook_notify_resume into resume_user_mode.h. While doing that rename tracehook_notify_resume to resume_user_mode_work. Update all of the places that included tracehook.h for these functions to include resume_user_mode.h instead. Update all of the callers of tracehook_notify_resume to call resume_user_mode_work. Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/20220309162454.123006-12-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- arch/Kconfig | 2 +- arch/alpha/kernel/signal.c | 4 +-- arch/arc/kernel/signal.c | 4 +-- arch/arm/kernel/signal.c | 4 +-- arch/arm64/kernel/signal.c | 4 +-- arch/csky/kernel/signal.c | 4 +-- arch/h8300/kernel/signal.c | 4 +-- arch/hexagon/kernel/process.c | 4 +-- arch/hexagon/kernel/signal.c | 1 - arch/ia64/kernel/process.c | 4 +-- arch/ia64/kernel/ptrace.c | 2 +- arch/ia64/kernel/signal.c | 1 - arch/m68k/kernel/signal.c | 4 +-- arch/microblaze/kernel/signal.c | 4 +-- arch/mips/kernel/signal.c | 4 +-- arch/nds32/kernel/signal.c | 4 +-- arch/nios2/kernel/signal.c | 4 +-- arch/openrisc/kernel/signal.c | 4 +-- arch/parisc/kernel/signal.c | 4 +-- arch/powerpc/kernel/signal.c | 4 +-- arch/riscv/kernel/signal.c | 4 +-- arch/sh/kernel/signal_32.c | 4 +-- arch/sparc/kernel/signal32.c | 1 - arch/sparc/kernel/signal_32.c | 4 +-- arch/sparc/kernel/signal_64.c | 4 +-- arch/um/kernel/process.c | 4 +-- arch/xtensa/kernel/signal.c | 4 +-- block/blk-cgroup.c | 2 +- include/linux/entry-kvm.h | 2 +- include/linux/resume_user_mode.h | 64 ++++++++++++++++++++++++++++++++++++++++ include/linux/tracehook.h | 51 -------------------------------- kernel/entry/common.c | 4 +-- kernel/entry/kvm.c | 2 +- kernel/task_work.c | 2 +- mm/memcontrol.c | 2 +- 35 files changed, 117 insertions(+), 107 deletions(-) create mode 100644 include/linux/resume_user_mode.h (limited to 'kernel/entry/common.c') diff --git a/arch/Kconfig b/arch/Kconfig index 6382520ef0a5..2e3979c3d66d 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -218,7 +218,7 @@ config TRACE_IRQFLAGS_SUPPORT # linux/regset.h user_regset interfaces # CORE_DUMP_USE_REGSET #define'd in linux/elf.h # TIF_SYSCALL_TRACE calls ptrace_report_syscall_{entry,exit} -# TIF_NOTIFY_RESUME calls tracehook_notify_resume() +# TIF_NOTIFY_RESUME calls resume_user_mode_work() # config HAVE_ARCH_TRACEHOOK bool diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index d8ed71d5bed3..6f47f256fe80 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include @@ -531,7 +531,7 @@ do_work_pending(struct pt_regs *regs, unsigned long thread_flags, do_signal(regs, r0, r19); r0 = 0; } else { - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } } local_irq_disable(); diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index cb2f88502baf..f748483628f2 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -49,7 +49,7 @@ #include #include #include -#include +#include #include #include @@ -438,5 +438,5 @@ void do_notify_resume(struct pt_regs *regs) * user mode */ if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index c532a6041066..459abc5d1819 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include @@ -627,7 +627,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) } else if (thread_flags & _TIF_UPROBE) { uprobe_notify_resume(regs); } else { - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } } local_irq_disable(); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index d8aaf4b6f432..413c51de9d10 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -941,7 +941,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) do_signal(regs); if (thread_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); if (thread_flags & _TIF_FOREIGN_FPSTATE) fpsimd_restore_current_state(); diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index c7b763d2f526..7a3149a27e4d 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -265,5 +265,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index 75a1c36b105a..0716fc8a8ce2 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include @@ -283,5 +283,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags) do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index 232dfd8956aa..ae3f728eeca0 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include /* * Program thread launch. Often defined as a macro in processor.h, @@ -178,7 +178,7 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags) } if (thread_info_flags & _TIF_NOTIFY_RESUME) { - tracehook_notify_resume(regs); + resume_user_mode_work(regs); return 1; } diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c index 94cc7ff52dce..bcba31e9e0ae 100644 --- a/arch/hexagon/kernel/signal.c +++ b/arch/hexagon/kernel/signal.c @@ -7,7 +7,6 @@ #include #include -#include #include #include diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 834df24a88f1..d7a256bd9d6b 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include @@ -179,7 +179,7 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) if (test_thread_flag(TIF_NOTIFY_RESUME)) { local_irq_enable(); /* force interrupt enable */ - tracehook_notify_resume(&scr->pt); + resume_user_mode_work(&scr->pt); } /* copy user rbs to kernel rbs */ diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 6af64aae087d..a19acd9f5e1f 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index c1b299760bf7..51cf6a7ec158 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 338817d0cb3f..49533f65958a 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include @@ -1109,5 +1109,5 @@ void do_notify_resume(struct pt_regs *regs) do_signal(regs); if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c index 23e8a9336a29..561eb82d7af6 100644 --- a/arch/microblaze/kernel/signal.c +++ b/arch/microblaze/kernel/signal.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include @@ -311,5 +311,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, int in_syscall) do_signal(regs, in_syscall); if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 5bce782e694c..1a99f26bf99f 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include @@ -916,7 +916,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); user_enter(); } diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c index 7e3ca430a223..551caef595cb 100644 --- a/arch/nds32/kernel/signal.c +++ b/arch/nds32/kernel/signal.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include @@ -380,5 +380,5 @@ do_notify_resume(struct pt_regs *regs, unsigned int thread_flags) do_signal(regs); if (thread_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c index 2009ae2d3c3b..530b60c99545 100644 --- a/arch/nios2/kernel/signal.c +++ b/arch/nios2/kernel/signal.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include @@ -319,7 +319,7 @@ asmlinkage int do_notify_resume(struct pt_regs *regs) return restart; } } else if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); return 0; } diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c index 92c5b70740f5..80f69740c731 100644 --- a/arch/openrisc/kernel/signal.c +++ b/arch/openrisc/kernel/signal.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include @@ -309,7 +309,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) } syscall = 0; } else { - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } } local_irq_disable(); diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 46b1050640b8..2f7ebe9add20 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include @@ -602,5 +602,5 @@ void do_notify_resume(struct pt_regs *regs, long in_syscall) do_signal(regs, in_syscall); if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index b93b87df499d..f7f8620663c7 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -9,7 +9,7 @@ * this archive for more details. */ -#include +#include #include #include #include @@ -294,7 +294,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) } if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } static unsigned long get_tm_stackpointer(struct task_struct *tsk) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index c2d5ecbe5526..d80bf5896c6f 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include @@ -317,5 +317,5 @@ asmlinkage __visible void do_notify_resume(struct pt_regs *regs, do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index dd3092911efa..90f495d35db2 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -503,5 +503,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0, do_signal(regs, save_r0); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 6cc124a3bb98..f9fe502b81c6 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index ffab16369bea..80c89b362d8b 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -19,7 +19,7 @@ #include #include /* do_coredum */ #include -#include +#include #include #include @@ -524,7 +524,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs, orig_i0); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } asmlinkage int do_sys_sigstack(struct sigstack __user *ssptr, diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 2a78d2af1265..8b9fc76cd3e0 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -552,7 +552,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) do_signal(regs, orig_i0); if (thread_info_flags & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); user_enter(); } diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 4a420778ed87..80504680be08 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include @@ -104,7 +104,7 @@ void interrupt_end(void) test_thread_flag(TIF_NOTIFY_SIGNAL)) do_signal(regs); if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } int get_current_pid(void) diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index f6c949895b3e..6f68649e86ba 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include @@ -511,5 +511,5 @@ void do_notify_resume(struct pt_regs *regs) do_signal(regs); if (test_thread_flag(TIF_NOTIFY_RESUME)) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); } diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 650f7e27989f..4d8be1634bc6 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include "blk.h" diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 07c878d6e323..6813171afccb 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -3,7 +3,7 @@ #define __LINUX_ENTRYKVM_H #include -#include +#include #include #include #include diff --git a/include/linux/resume_user_mode.h b/include/linux/resume_user_mode.h new file mode 100644 index 000000000000..285189454449 --- /dev/null +++ b/include/linux/resume_user_mode.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef LINUX_RESUME_USER_MODE_H +#define LINUX_RESUME_USER_MODE_H + +#include +#include +#include +#include + +/** + * set_notify_resume - cause resume_user_mode_work() to be called + * @task: task that will call resume_user_mode_work() + * + * Calling this arranges that @task will call resume_user_mode_work() + * before returning to user mode. If it's already running in user mode, + * it will enter the kernel and call resume_user_mode_work() soon. + * If it's blocked, it will not be woken. + */ +static inline void set_notify_resume(struct task_struct *task) +{ + if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME)) + kick_process(task); +} + + +/** + * resume_user_mode_work - Perform work before returning to user mode + * @regs: user-mode registers of @current task + * + * This is called when %TIF_NOTIFY_RESUME has been set. Now we are + * about to return to user mode, and the user state in @regs can be + * inspected or adjusted. The caller in arch code has cleared + * %TIF_NOTIFY_RESUME before the call. If the flag gets set again + * asynchronously, this will be called again before we return to + * user mode. + * + * Called without locks. + */ +static inline void resume_user_mode_work(struct pt_regs *regs) +{ + clear_thread_flag(TIF_NOTIFY_RESUME); + /* + * This barrier pairs with task_work_add()->set_notify_resume() after + * hlist_add_head(task->task_works); + */ + smp_mb__after_atomic(); + if (unlikely(task_work_pending(current))) + task_work_run(); + +#ifdef CONFIG_KEYS_REQUEST_CACHE + if (unlikely(current->cached_requested_key)) { + key_put(current->cached_requested_key); + current->cached_requested_key = NULL; + } +#endif + + mem_cgroup_handle_over_high(); + blkcg_maybe_throttle_current(); + + rseq_handle_notify_resume(NULL, regs); +} + +#endif /* LINUX_RESUME_USER_MODE_H */ diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 946404ebe10b..9f6b3fd1880a 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -52,56 +52,5 @@ struct linux_binprm; -/** - * set_notify_resume - cause tracehook_notify_resume() to be called - * @task: task that will call tracehook_notify_resume() - * - * Calling this arranges that @task will call tracehook_notify_resume() - * before returning to user mode. If it's already running in user mode, - * it will enter the kernel and call tracehook_notify_resume() soon. - * If it's blocked, it will not be woken. - */ -static inline void set_notify_resume(struct task_struct *task) -{ - if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME)) - kick_process(task); -} - -/** - * tracehook_notify_resume - report when about to return to user mode - * @regs: user-mode registers of @current task - * - * This is called when %TIF_NOTIFY_RESUME has been set. Now we are - * about to return to user mode, and the user state in @regs can be - * inspected or adjusted. The caller in arch code has cleared - * %TIF_NOTIFY_RESUME before the call. If the flag gets set again - * asynchronously, this will be called again before we return to - * user mode. - * - * Called without locks. - */ -static inline void tracehook_notify_resume(struct pt_regs *regs) -{ - clear_thread_flag(TIF_NOTIFY_RESUME); - /* - * This barrier pairs with task_work_add()->set_notify_resume() after - * hlist_add_head(task->task_works); - */ - smp_mb__after_atomic(); - if (unlikely(task_work_pending(current))) - task_work_run(); - -#ifdef CONFIG_KEYS_REQUEST_CACHE - if (unlikely(current->cached_requested_key)) { - key_put(current->cached_requested_key); - current->cached_requested_key = NULL; - } -#endif - - mem_cgroup_handle_over_high(); - blkcg_maybe_throttle_current(); - - rseq_handle_notify_resume(NULL, regs); -} #endif /* */ diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 79eaf9b4b10d..a86823cad853 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -165,7 +165,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, arch_do_signal_or_restart(regs); if (ti_work & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(regs); + resume_user_mode_work(regs); /* Architecture specific TIF work */ arch_exit_to_user_mode_work(regs, ti_work); diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index 3ab5f98988c3..9d09f489b60e 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -23,7 +23,7 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) schedule(); if (ti_work & _TIF_NOTIFY_RESUME) - tracehook_notify_resume(NULL); + resume_user_mode_work(NULL); ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work); if (ret) diff --git a/kernel/task_work.c b/kernel/task_work.c index cc6fccb0e24d..c59e1a49bc40 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include +#include static struct callback_head work_exited; /* all we need is ->next == NULL */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 09d342c7cbd0..2aaa400f34d6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -59,7 +59,7 @@ #include #include #include -#include +#include #include #include #include "internal.h" -- cgit From 7dd5ad2d3e82fb55229e3fe18e09160878e77e20 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Mar 2022 10:36:55 +0200 Subject: Revert "signal, x86: Delay calling signals in atomic on RT enabled kernels" Revert commit bf9ad37dc8a. It needs to be better encapsulated and generalized. Signed-off-by: Thomas Gleixner Cc: "Eric W. Biederman" Cc: Oleg Nesterov Cc: Sebastian Andrzej Siewior --- arch/x86/Kconfig | 1 - include/linux/sched.h | 3 --- kernel/Kconfig.preempt | 12 +----------- kernel/entry/common.c | 14 -------------- kernel/signal.c | 40 ---------------------------------------- 5 files changed, 1 insertion(+), 69 deletions(-) (limited to 'kernel/entry/common.c') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7340d9f01b62..442a426e8a68 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -122,7 +122,6 @@ config X86 select ARCH_WANT_GENERAL_HUGETLB select ARCH_WANT_HUGE_PMD_SHARE select ARCH_WANT_LD_ORPHAN_WARN - select ARCH_WANTS_RT_DELAYED_SIGNALS select ARCH_WANTS_THP_SWAP if X86_64 select ARCH_HAS_PARANOID_L1D_FLUSH select BUILDTIME_TABLE_SORT diff --git a/include/linux/sched.h b/include/linux/sched.h index 4a6fdd2a679f..d5e3c00b74e1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1090,9 +1090,6 @@ struct task_struct { /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; -#ifdef CONFIG_RT_DELAYED_SIGNALS - struct kernel_siginfo forced_info; -#endif unsigned long sas_ss_sp; size_t sas_ss_size; unsigned int sas_ss_flags; diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 8c6de5a9ecc4..c2f1fd95a821 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -133,14 +133,4 @@ config SCHED_CORE which is the likely usage by Linux distributions, there should be no measurable impact on performance. -config ARCH_WANTS_RT_DELAYED_SIGNALS - bool - help - This option is selected by architectures where raising signals - can happen in atomic contexts on PREEMPT_RT enabled kernels. This - option delays raising the signal until the return to user space - loop where it is also delivered. X86 requires this to deliver - signals from trap handlers which run on IST stacks. - -config RT_DELAYED_SIGNALS - def_bool PREEMPT_RT && ARCH_WANTS_RT_DELAYED_SIGNALS + diff --git a/kernel/entry/common.c b/kernel/entry/common.c index ef8d94a98b7e..e57a224d6b79 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -142,18 +142,6 @@ void noinstr exit_to_user_mode(void) /* Workaround to allow gradual conversion of architecture code */ void __weak arch_do_signal_or_restart(struct pt_regs *regs) { } -#ifdef CONFIG_RT_DELAYED_SIGNALS -static inline void raise_delayed_signal(void) -{ - if (unlikely(current->forced_info.si_signo)) { - force_sig_info(¤t->forced_info); - current->forced_info.si_signo = 0; - } -} -#else -static inline void raise_delayed_signal(void) { } -#endif - static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, unsigned long ti_work) { @@ -168,8 +156,6 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, if (ti_work & _TIF_NEED_RESCHED) schedule(); - raise_delayed_signal(); - if (ti_work & _TIF_UPROBE) uprobe_notify_resume(regs); diff --git a/kernel/signal.c b/kernel/signal.c index 368a34c25bbf..30cd1ca43bcd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1307,43 +1307,6 @@ enum sig_handler { HANDLER_EXIT, /* Only visible as the process exit code */ }; -/* - * On some archictectures, PREEMPT_RT has to delay sending a signal from a - * trap since it cannot enable preemption, and the signal code's - * spin_locks turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME - * which will send the signal on exit of the trap. - */ -#ifdef CONFIG_RT_DELAYED_SIGNALS -static inline bool force_sig_delayed(struct kernel_siginfo *info, - struct task_struct *t) -{ - if (!in_atomic()) - return false; - - if (WARN_ON_ONCE(t->forced_info.si_signo)) - return true; - - if (is_si_special(info)) { - WARN_ON_ONCE(info != SEND_SIG_PRIV); - t->forced_info.si_signo = info->si_signo; - t->forced_info.si_errno = 0; - t->forced_info.si_code = SI_KERNEL; - t->forced_info.si_pid = 0; - t->forced_info.si_uid = 0; - } else { - t->forced_info = *info; - } - set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); - return true; -} -#else -static inline bool force_sig_delayed(struct kernel_siginfo *info, - struct task_struct *t) -{ - return false; -} -#endif - /* * Force a signal that the process can't ignore: if necessary * we unblock the signal and change any SIG_IGN to SIG_DFL. @@ -1364,9 +1327,6 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, struct k_sigaction *action; int sig = info->si_signo; - if (force_sig_delayed(info, t)) - return 0; - spin_lock_irqsave(&t->sighand->siglock, flags); action = &t->sighand->action[sig-1]; ignored = action->sa.sa_handler == SIG_IGN; -- cgit From 0a70045ed8516dfcff4b5728557e1ef3fd017c53 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 30 Mar 2022 10:43:28 +0200 Subject: entry: Fix compile error in dynamic_irqentry_exit_cond_resched() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kernel/entry/common.c: In function ‘dynamic_irqentry_exit_cond_resched’: kernel/entry/common.c:409:14: error: implicit declaration of function ‘static_key_unlikely’; did you mean ‘static_key_enable’? [-Werror=implicit-function-declaration] 409 | if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) | ^~~~~~~~~~~~~~~~~~~ | static_key_enable static_key_unlikely() should be static_branch_unlikely(). Fixes: 99cf983cc8bca ("sched/preempt: Add PREEMPT_DYNAMIC using static keys") Signed-off-by: Sven Schnelle Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20220330084328.1805665-1-svens@linux.ibm.com --- kernel/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/entry/common.c') diff --git a/kernel/entry/common.c b/kernel/entry/common.c index e57a224d6b79..93c3b86e781c 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -392,7 +392,7 @@ DEFINE_STATIC_CALL(irqentry_exit_cond_resched, raw_irqentry_exit_cond_resched); DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); void dynamic_irqentry_exit_cond_resched(void) { - if (!static_key_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) + if (!static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) return; raw_irqentry_exit_cond_resched(); } -- cgit From 8b023accc8df70e72f7704d29fead7ca914d6837 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 14 Mar 2022 15:19:03 -0700 Subject: lockdep: Fix -Wunused-parameter for _THIS_IP_ While looking into a bug related to the compiler's handling of addresses of labels, I noticed some uses of _THIS_IP_ seemed unused in lockdep. Drive by cleanup. -Wunused-parameter: kernel/locking/lockdep.c:1383:22: warning: unused parameter 'ip' kernel/locking/lockdep.c:4246:48: warning: unused parameter 'ip' kernel/locking/lockdep.c:4844:19: warning: unused parameter 'ip' Signed-off-by: Nick Desaulniers Signed-off-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Link: https://lore.kernel.org/r/20220314221909.2027027-1-ndesaulniers@google.com --- arch/arm64/kernel/entry-common.c | 8 ++++---- include/linux/irqflags.h | 4 ++-- include/linux/kvm_host.h | 2 +- kernel/entry/common.c | 6 +++--- kernel/locking/lockdep.c | 22 ++++++++-------------- kernel/sched/idle.c | 2 +- kernel/trace/trace_preemptirq.c | 4 ++-- 7 files changed, 21 insertions(+), 27 deletions(-) (limited to 'kernel/entry/common.c') diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 878c65aa7206..e04602ea4629 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -75,7 +75,7 @@ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs) if (interrupts_enabled(regs)) { if (regs->exit_rcu) { trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); rcu_irq_exit(); lockdep_hardirqs_on(CALLER_ADDR0); return; @@ -121,7 +121,7 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs) static __always_inline void __exit_to_user_mode(void) { trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); user_enter_irqoff(); lockdep_hardirqs_on(CALLER_ADDR0); } @@ -179,7 +179,7 @@ static void noinstr arm64_exit_nmi(struct pt_regs *regs) ftrace_nmi_exit(); if (restore) { trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); } rcu_nmi_exit(); @@ -215,7 +215,7 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs) if (restore) { trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); } rcu_nmi_exit(); diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 4b140938b03e..5ec0fa71399e 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -20,13 +20,13 @@ #ifdef CONFIG_PROVE_LOCKING extern void lockdep_softirqs_on(unsigned long ip); extern void lockdep_softirqs_off(unsigned long ip); - extern void lockdep_hardirqs_on_prepare(unsigned long ip); + extern void lockdep_hardirqs_on_prepare(void); extern void lockdep_hardirqs_on(unsigned long ip); extern void lockdep_hardirqs_off(unsigned long ip); #else static inline void lockdep_softirqs_on(unsigned long ip) { } static inline void lockdep_softirqs_off(unsigned long ip) { } - static inline void lockdep_hardirqs_on_prepare(unsigned long ip) { } + static inline void lockdep_hardirqs_on_prepare(void) { } static inline void lockdep_hardirqs_on(unsigned long ip) { } static inline void lockdep_hardirqs_off(unsigned long ip) { } #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3f9b22c4983a..e1b2413750c0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -450,7 +450,7 @@ static __always_inline void guest_state_enter_irqoff(void) { instrumentation_begin(); trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); instrumentation_end(); guest_context_enter_irqoff(); diff --git a/kernel/entry/common.c b/kernel/entry/common.c index e57a224d6b79..2eef7b22e02a 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -126,7 +126,7 @@ static __always_inline void __exit_to_user_mode(void) { instrumentation_begin(); trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); instrumentation_end(); user_enter_irqoff(); @@ -416,7 +416,7 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) instrumentation_begin(); /* Tell the tracer that IRET will enable interrupts */ trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); instrumentation_end(); rcu_irq_exit(); lockdep_hardirqs_on(CALLER_ADDR0); @@ -465,7 +465,7 @@ void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state) ftrace_nmi_exit(); if (irq_state.lockdep) { trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); } instrumentation_end(); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index c06cab6546ed..3cbd49216174 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1380,7 +1380,7 @@ static struct lock_list *alloc_list_entry(void) */ static int add_lock_to_list(struct lock_class *this, struct lock_class *links_to, struct list_head *head, - unsigned long ip, u16 distance, u8 dep, + u16 distance, u8 dep, const struct lock_trace *trace) { struct lock_list *entry; @@ -3133,19 +3133,15 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, * to the previous lock's dependency list: */ ret = add_lock_to_list(hlock_class(next), hlock_class(prev), - &hlock_class(prev)->locks_after, - next->acquire_ip, distance, - calc_dep(prev, next), - *trace); + &hlock_class(prev)->locks_after, distance, + calc_dep(prev, next), *trace); if (!ret) return 0; ret = add_lock_to_list(hlock_class(prev), hlock_class(next), - &hlock_class(next)->locks_before, - next->acquire_ip, distance, - calc_depb(prev, next), - *trace); + &hlock_class(next)->locks_before, distance, + calc_depb(prev, next), *trace); if (!ret) return 0; @@ -4236,14 +4232,13 @@ static void __trace_hardirqs_on_caller(void) /** * lockdep_hardirqs_on_prepare - Prepare for enabling interrupts - * @ip: Caller address * * Invoked before a possible transition to RCU idle from exit to user or * guest mode. This ensures that all RCU operations are done before RCU * stops watching. After the RCU transition lockdep_hardirqs_on() has to be * invoked to set the final state. */ -void lockdep_hardirqs_on_prepare(unsigned long ip) +void lockdep_hardirqs_on_prepare(void) { if (unlikely(!debug_locks)) return; @@ -4840,8 +4835,7 @@ EXPORT_SYMBOL_GPL(__lockdep_no_validate__); static void print_lock_nested_lock_not_held(struct task_struct *curr, - struct held_lock *hlock, - unsigned long ip) + struct held_lock *hlock) { if (!debug_locks_off()) return; @@ -5017,7 +5011,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, chain_key = iterate_chain_key(chain_key, hlock_id(hlock)); if (nest_lock && !__lock_is_held(nest_lock, -1)) { - print_lock_nested_lock_not_held(curr, hlock, ip); + print_lock_nested_lock_not_held(curr, hlock); return 0; } diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 8f8b5020e76a..3984dd21a5aa 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -102,7 +102,7 @@ void __cpuidle default_idle_call(void) * last -- this is very similar to the entry code. */ trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(_THIS_IP_); + lockdep_hardirqs_on_prepare(); rcu_idle_enter(); lockdep_hardirqs_on(_THIS_IP_); diff --git a/kernel/trace/trace_preemptirq.c b/kernel/trace/trace_preemptirq.c index f4938040c228..95b58bd757ce 100644 --- a/kernel/trace/trace_preemptirq.c +++ b/kernel/trace/trace_preemptirq.c @@ -46,7 +46,7 @@ void trace_hardirqs_on(void) this_cpu_write(tracing_irq_cpu, 0); } - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); lockdep_hardirqs_on(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_on); @@ -94,7 +94,7 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr) this_cpu_write(tracing_irq_cpu, 0); } - lockdep_hardirqs_on_prepare(CALLER_ADDR0); + lockdep_hardirqs_on_prepare(); lockdep_hardirqs_on(CALLER_ADDR0); } EXPORT_SYMBOL(trace_hardirqs_on_caller); -- cgit From 6d97af487dee3176cb1342d4ab16637e495440ad Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 4 May 2022 08:23:50 +0200 Subject: entry: Rename arch_check_user_regs() to arch_enter_from_user_mode() arch_check_user_regs() is used at the moment to verify that struct pt_regs contains valid values when entering the kernel from userspace. s390 needs a place in the generic entry code to modify a cpu data structure when switching from userspace to kernel mode. As arch_check_user_regs() is exactly this, rename it to arch_enter_from_user_mode(). When entering the kernel from userspace, arch_check_user_regs() is used to verify that struct pt_regs contains valid values. Note that the NMI codepath doesn't call this function. s390 needs a place in the generic entry code to modify a cpu data structure when switching from userspace to kernel mode. As arch_check_user_regs() is exactly this, rename it to arch_enter_from_user_mode(). Signed-off-by: Sven Schnelle Reviewed-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Link: https://lore.kernel.org/r/20220504062351.2954280-2-tmricht@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/include/asm/entry-common.h | 4 ++-- arch/x86/include/asm/entry-common.h | 4 ++-- include/linux/entry-common.h | 8 ++++---- kernel/entry/common.c | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'kernel/entry/common.c') diff --git a/arch/s390/include/asm/entry-common.h b/arch/s390/include/asm/entry-common.h index 2f0a1cacdf85..99d654ccd3db 100644 --- a/arch/s390/include/asm/entry-common.h +++ b/arch/s390/include/asm/entry-common.h @@ -15,12 +15,12 @@ void do_per_trap(struct pt_regs *regs); #ifdef CONFIG_DEBUG_ENTRY -static __always_inline void arch_check_user_regs(struct pt_regs *regs) +static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) { debug_user_asce(0); } -#define arch_check_user_regs arch_check_user_regs +#define arch_enter_from_user_mode arch_enter_from_user_mode #endif /* CONFIG_DEBUG_ENTRY */ static __always_inline void arch_exit_to_user_mode_work(struct pt_regs *regs, diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 43184640b579..674ed46d3ced 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -10,7 +10,7 @@ #include /* Check that the stack and regs on entry from user mode are sane. */ -static __always_inline void arch_check_user_regs(struct pt_regs *regs) +static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) { if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) { /* @@ -42,7 +42,7 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs) WARN_ON_ONCE(regs != task_pt_regs(current)); } } -#define arch_check_user_regs arch_check_user_regs +#define arch_enter_from_user_mode arch_enter_from_user_mode static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, unsigned long ti_work) diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index ab78bd4c2eb0..c92ac75d6556 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -63,7 +63,7 @@ ARCH_EXIT_TO_USER_MODE_WORK) /** - * arch_check_user_regs - Architecture specific sanity check for user mode regs + * arch_enter_from_user_mode - Architecture specific sanity check for user mode regs * @regs: Pointer to currents pt_regs * * Defaults to an empty implementation. Can be replaced by architecture @@ -73,10 +73,10 @@ * section. Use __always_inline so the compiler cannot push it out of line * and make it instrumentable. */ -static __always_inline void arch_check_user_regs(struct pt_regs *regs); +static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs); -#ifndef arch_check_user_regs -static __always_inline void arch_check_user_regs(struct pt_regs *regs) {} +#ifndef arch_enter_from_user_mode +static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs) {} #endif /** diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 93c3b86e781c..9e63923c5a0f 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -17,7 +17,7 @@ /* See comment for enter_from_user_mode() in entry-common.h */ static __always_inline void __enter_from_user_mode(struct pt_regs *regs) { - arch_check_user_regs(regs); + arch_enter_from_user_mode(regs); lockdep_hardirqs_off(CALLER_ADDR0); CT_WARN_ON(ct_state() != CONTEXT_USER); -- cgit