From c52198601695851622f361d3f16456e9fc857629 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 20 Mar 2023 17:55:13 -0700 Subject: locking/csd_lock: Add Kconfig option for csd_debug default The csd_debug kernel parameter works well, but is inconvenient in cases where it is more closely associated with boot loaders or automation than with a particular kernel version or release. Thererfore, provide a new CSD_LOCK_WAIT_DEBUG_DEFAULT Kconfig option that defaults csd_debug to 1 when selected and 0 otherwise, with this latter being the default. Signed-off-by: Paul E. McKenney Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juergen Gross Link: https://lore.kernel.org/r/20230321005516.50558-1-paulmck@kernel.org --- kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index 06a413987a14..e2d558f5cef8 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -158,7 +158,7 @@ void __init call_function_init(void) #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG -static DEFINE_STATIC_KEY_FALSE(csdlock_debug_enabled); +static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled); static DEFINE_STATIC_KEY_FALSE(csdlock_debug_extended); static int __init csdlock_debug(char *str) -- cgit From 1771257cb447a7b27a15ed9aaf332726c47fcbcf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 20 Mar 2023 17:55:14 -0700 Subject: locking/csd_lock: Remove added data from CSD lock debugging The diagnostics added by this commit were extremely useful in one instance: a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging") However, they have not seen much action since, and there have been some concerns expressed that the complexity is not worth the benefit. Therefore, manually revert this commit, but leave a comment telling people where to find these diagnostics. [ paulmck: Apply Juergen Gross feedback. ] Signed-off-by: Paul E. McKenney Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juergen Gross Link: https://lore.kernel.org/r/20230321005516.50558-2-paulmck@kernel.org --- Documentation/admin-guide/kernel-parameters.txt | 4 - kernel/smp.c | 233 ++---------------------- 2 files changed, 12 insertions(+), 225 deletions(-) (limited to 'kernel/smp.c') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index ce70777f5999..b15198a85acb 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -896,10 +896,6 @@ to resolve the hang situation. The default value of this option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT Kconfig option. - 0: disable csdlock debugging - 1: enable basic csdlock debugging (minor impact) - ext: enable extended csdlock debugging (more impact, - but more data) dasd= [HW,NET] See header of drivers/s390/block/dasd_devmap.c. diff --git a/kernel/smp.c b/kernel/smp.c index e2d558f5cef8..038d666f327b 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -31,59 +31,8 @@ #define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK) -#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG -union cfd_seq_cnt { - u64 val; - struct { - u64 src:16; - u64 dst:16; -#define CFD_SEQ_NOCPU 0xffff - u64 type:4; -#define CFD_SEQ_QUEUE 0 -#define CFD_SEQ_IPI 1 -#define CFD_SEQ_NOIPI 2 -#define CFD_SEQ_PING 3 -#define CFD_SEQ_PINGED 4 -#define CFD_SEQ_HANDLE 5 -#define CFD_SEQ_DEQUEUE 6 -#define CFD_SEQ_IDLE 7 -#define CFD_SEQ_GOTIPI 8 -#define CFD_SEQ_HDLEND 9 - u64 cnt:28; - } u; -}; - -static char *seq_type[] = { - [CFD_SEQ_QUEUE] = "queue", - [CFD_SEQ_IPI] = "ipi", - [CFD_SEQ_NOIPI] = "noipi", - [CFD_SEQ_PING] = "ping", - [CFD_SEQ_PINGED] = "pinged", - [CFD_SEQ_HANDLE] = "handle", - [CFD_SEQ_DEQUEUE] = "dequeue (src CPU 0 == empty)", - [CFD_SEQ_IDLE] = "idle", - [CFD_SEQ_GOTIPI] = "gotipi", - [CFD_SEQ_HDLEND] = "hdlend (src CPU 0 == early)", -}; - -struct cfd_seq_local { - u64 ping; - u64 pinged; - u64 handle; - u64 dequeue; - u64 idle; - u64 gotipi; - u64 hdlend; -}; -#endif - struct cfd_percpu { call_single_data_t csd; -#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG - u64 seq_queue; - u64 seq_ipi; - u64 seq_noipi; -#endif }; struct call_function_data { @@ -159,18 +108,21 @@ void __init call_function_init(void) #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled); -static DEFINE_STATIC_KEY_FALSE(csdlock_debug_extended); +/* + * Parse the csdlock_debug= kernel boot parameter. + * + * If you need to restore the old "ext" value that once provided + * additional debugging information, reapply the following commits: + * + * de7b09ef658d ("locking/csd_lock: Prepare more CSD lock debugging") + * a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging") + */ static int __init csdlock_debug(char *str) { unsigned int val = 0; - if (str && !strcmp(str, "ext")) { - val = 1; - static_branch_enable(&csdlock_debug_extended); - } else - get_option(&str, &val); - + get_option(&str, &val); if (val) static_branch_enable(&csdlock_debug_enabled); @@ -181,36 +133,11 @@ __setup("csdlock_debug=", csdlock_debug); static DEFINE_PER_CPU(call_single_data_t *, cur_csd); static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); static DEFINE_PER_CPU(void *, cur_csd_info); -static DEFINE_PER_CPU(struct cfd_seq_local, cfd_seq_local); static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */ module_param(csd_lock_timeout, ulong, 0444); static atomic_t csd_bug_count = ATOMIC_INIT(0); -static u64 cfd_seq; - -#define CFD_SEQ(s, d, t, c) \ - (union cfd_seq_cnt){ .u.src = s, .u.dst = d, .u.type = t, .u.cnt = c } - -static u64 cfd_seq_inc(unsigned int src, unsigned int dst, unsigned int type) -{ - union cfd_seq_cnt new, old; - - new = CFD_SEQ(src, dst, type, 0); - - do { - old.val = READ_ONCE(cfd_seq); - new.u.cnt = old.u.cnt + 1; - } while (cmpxchg(&cfd_seq, old.val, new.val) != old.val); - - return old.val; -} - -#define cfd_seq_store(var, src, dst, type) \ - do { \ - if (static_branch_unlikely(&csdlock_debug_extended)) \ - var = cfd_seq_inc(src, dst, type); \ - } while (0) /* Record current CSD work for current CPU, NULL to erase. */ static void __csd_lock_record(struct __call_single_data *csd) @@ -244,80 +171,6 @@ static int csd_lock_wait_getcpu(struct __call_single_data *csd) return -1; } -static void cfd_seq_data_add(u64 val, unsigned int src, unsigned int dst, - unsigned int type, union cfd_seq_cnt *data, - unsigned int *n_data, unsigned int now) -{ - union cfd_seq_cnt new[2]; - unsigned int i, j, k; - - new[0].val = val; - new[1] = CFD_SEQ(src, dst, type, new[0].u.cnt + 1); - - for (i = 0; i < 2; i++) { - if (new[i].u.cnt <= now) - new[i].u.cnt |= 0x80000000U; - for (j = 0; j < *n_data; j++) { - if (new[i].u.cnt == data[j].u.cnt) { - /* Direct read value trumps generated one. */ - if (i == 0) - data[j].val = new[i].val; - break; - } - if (new[i].u.cnt < data[j].u.cnt) { - for (k = *n_data; k > j; k--) - data[k].val = data[k - 1].val; - data[j].val = new[i].val; - (*n_data)++; - break; - } - } - if (j == *n_data) { - data[j].val = new[i].val; - (*n_data)++; - } - } -} - -static const char *csd_lock_get_type(unsigned int type) -{ - return (type >= ARRAY_SIZE(seq_type)) ? "?" : seq_type[type]; -} - -static void csd_lock_print_extended(struct __call_single_data *csd, int cpu) -{ - struct cfd_seq_local *seq = &per_cpu(cfd_seq_local, cpu); - unsigned int srccpu = csd->node.src; - struct call_function_data *cfd = per_cpu_ptr(&cfd_data, srccpu); - struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu); - unsigned int now; - union cfd_seq_cnt data[2 * ARRAY_SIZE(seq_type)]; - unsigned int n_data = 0, i; - - data[0].val = READ_ONCE(cfd_seq); - now = data[0].u.cnt; - - cfd_seq_data_add(pcpu->seq_queue, srccpu, cpu, CFD_SEQ_QUEUE, data, &n_data, now); - cfd_seq_data_add(pcpu->seq_ipi, srccpu, cpu, CFD_SEQ_IPI, data, &n_data, now); - cfd_seq_data_add(pcpu->seq_noipi, srccpu, cpu, CFD_SEQ_NOIPI, data, &n_data, now); - - cfd_seq_data_add(per_cpu(cfd_seq_local.ping, srccpu), srccpu, CFD_SEQ_NOCPU, CFD_SEQ_PING, data, &n_data, now); - cfd_seq_data_add(per_cpu(cfd_seq_local.pinged, srccpu), srccpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED, data, &n_data, now); - - cfd_seq_data_add(seq->idle, CFD_SEQ_NOCPU, cpu, CFD_SEQ_IDLE, data, &n_data, now); - cfd_seq_data_add(seq->gotipi, CFD_SEQ_NOCPU, cpu, CFD_SEQ_GOTIPI, data, &n_data, now); - cfd_seq_data_add(seq->handle, CFD_SEQ_NOCPU, cpu, CFD_SEQ_HANDLE, data, &n_data, now); - cfd_seq_data_add(seq->dequeue, CFD_SEQ_NOCPU, cpu, CFD_SEQ_DEQUEUE, data, &n_data, now); - cfd_seq_data_add(seq->hdlend, CFD_SEQ_NOCPU, cpu, CFD_SEQ_HDLEND, data, &n_data, now); - - for (i = 0; i < n_data; i++) { - pr_alert("\tcsd: cnt(%07x): %04x->%04x %s\n", - data[i].u.cnt & ~0x80000000U, data[i].u.src, - data[i].u.dst, csd_lock_get_type(data[i].u.type)); - } - pr_alert("\tcsd: cnt now: %07x\n", now); -} - /* * Complain if too much time spent waiting. Note that only * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU, @@ -368,8 +221,6 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 * *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request"); } if (cpu >= 0) { - if (static_branch_unlikely(&csdlock_debug_extended)) - csd_lock_print_extended(csd, cpu); dump_cpu_task(cpu); if (!cpu_cur_csd) { pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu); @@ -412,27 +263,7 @@ static __always_inline void csd_lock_wait(struct __call_single_data *csd) smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK)); } - -static void __smp_call_single_queue_debug(int cpu, struct llist_node *node) -{ - unsigned int this_cpu = smp_processor_id(); - struct cfd_seq_local *seq = this_cpu_ptr(&cfd_seq_local); - struct call_function_data *cfd = this_cpu_ptr(&cfd_data); - struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu); - - cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE); - if (llist_add(node, &per_cpu(call_single_queue, cpu))) { - cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI); - cfd_seq_store(seq->ping, this_cpu, cpu, CFD_SEQ_PING); - send_call_function_single_ipi(cpu); - cfd_seq_store(seq->pinged, this_cpu, cpu, CFD_SEQ_PINGED); - } else { - cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI); - } -} #else -#define cfd_seq_store(var, src, dst, type) - static void csd_lock_record(struct __call_single_data *csd) { } @@ -470,19 +301,6 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); void __smp_call_single_queue(int cpu, struct llist_node *node) { -#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG - if (static_branch_unlikely(&csdlock_debug_extended)) { - unsigned int type; - - type = CSD_TYPE(container_of(node, call_single_data_t, - node.llist)); - if (type == CSD_TYPE_SYNC || type == CSD_TYPE_ASYNC) { - __smp_call_single_queue_debug(cpu, node); - return; - } - } -#endif - /* * The list addition should be visible before sending the IPI * handler locks the list to pull the entry off it because of @@ -541,8 +359,6 @@ static int generic_exec_single(int cpu, struct __call_single_data *csd) */ void generic_smp_call_function_single_interrupt(void) { - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->gotipi, CFD_SEQ_NOCPU, - smp_processor_id(), CFD_SEQ_GOTIPI); __flush_smp_call_function_queue(true); } @@ -570,13 +386,7 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline) lockdep_assert_irqs_disabled(); head = this_cpu_ptr(&call_single_queue); - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->handle, CFD_SEQ_NOCPU, - smp_processor_id(), CFD_SEQ_HANDLE); entry = llist_del_all(head); - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->dequeue, - /* Special meaning of source cpu: 0 == queue empty */ - entry ? CFD_SEQ_NOCPU : 0, - smp_processor_id(), CFD_SEQ_DEQUEUE); entry = llist_reverse_order(entry); /* There shouldn't be any pending callbacks on an offline CPU. */ @@ -635,12 +445,8 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline) } } - if (!entry) { - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend, - 0, smp_processor_id(), - CFD_SEQ_HDLEND); + if (!entry) return; - } /* * Second; run all !SYNC callbacks. @@ -678,9 +484,6 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline) */ if (entry) sched_ttwu_pending(entry); - - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->hdlend, CFD_SEQ_NOCPU, - smp_processor_id(), CFD_SEQ_HDLEND); } @@ -704,8 +507,6 @@ void flush_smp_call_function_queue(void) if (llist_empty(this_cpu_ptr(&call_single_queue))) return; - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU, - smp_processor_id(), CFD_SEQ_IDLE); local_irq_save(flags); /* Get the already pending soft interrupts for RT enabled kernels */ was_pending = local_softirq_pending(); @@ -929,8 +730,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask, cpumask_clear(cfd->cpumask_ipi); for_each_cpu(cpu, cfd->cpumask) { - struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu); - call_single_data_t *csd = &pcpu->csd; + call_single_data_t *csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd; if (cond_func && !cond_func(cpu, info)) continue; @@ -944,20 +744,13 @@ static void smp_call_function_many_cond(const struct cpumask *mask, csd->node.src = smp_processor_id(); csd->node.dst = cpu; #endif - cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE); if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) { __cpumask_set_cpu(cpu, cfd->cpumask_ipi); nr_cpus++; last_cpu = cpu; - - cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI); - } else { - cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI); } } - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PING); - /* * Choose the most efficient way to send an IPI. Note that the * number of CPUs might be zero due to concurrent changes to the @@ -967,8 +760,6 @@ static void smp_call_function_many_cond(const struct cpumask *mask, send_call_function_single_ipi(last_cpu); else if (likely(nr_cpus > 1)) arch_send_call_function_ipi_mask(cfd->cpumask_ipi); - - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED); } if (run_local && (!cond_func || cond_func(this_cpu, info))) { -- cgit From 6366d062e7f97499409979f23f4107a6c45edb04 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 20 Mar 2023 17:55:15 -0700 Subject: locking/csd_lock: Remove per-CPU data indirection from CSD lock debugging The diagnostics added by this commit were extremely useful in one instance: a5aabace5fb8 ("locking/csd_lock: Add more data to CSD lock debugging") However, they have not seen much action since, and there have been some concerns expressed that the complexity is not worth the benefit. Therefore, manually revert the following commit preparatory commit: de7b09ef658d ("locking/csd_lock: Prepare more CSD lock debugging") Signed-off-by: Paul E. McKenney Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juergen Gross Link: https://lore.kernel.org/r/20230321005516.50558-3-paulmck@kernel.org --- kernel/smp.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index 038d666f327b..7a85bcddd9dc 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -31,12 +31,8 @@ #define CSD_TYPE(_csd) ((_csd)->node.u_flags & CSD_FLAG_TYPE_MASK) -struct cfd_percpu { - call_single_data_t csd; -}; - struct call_function_data { - struct cfd_percpu __percpu *pcpu; + call_single_data_t __percpu *csd; cpumask_var_t cpumask; cpumask_var_t cpumask_ipi; }; @@ -59,8 +55,8 @@ int smpcfd_prepare_cpu(unsigned int cpu) free_cpumask_var(cfd->cpumask); return -ENOMEM; } - cfd->pcpu = alloc_percpu(struct cfd_percpu); - if (!cfd->pcpu) { + cfd->csd = alloc_percpu(call_single_data_t); + if (!cfd->csd) { free_cpumask_var(cfd->cpumask); free_cpumask_var(cfd->cpumask_ipi); return -ENOMEM; @@ -75,7 +71,7 @@ int smpcfd_dead_cpu(unsigned int cpu) free_cpumask_var(cfd->cpumask); free_cpumask_var(cfd->cpumask_ipi); - free_percpu(cfd->pcpu); + free_percpu(cfd->csd); return 0; } @@ -730,7 +726,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask, cpumask_clear(cfd->cpumask_ipi); for_each_cpu(cpu, cfd->cpumask) { - call_single_data_t *csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd; + call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu); if (cond_func && !cond_func(cpu, info)) continue; @@ -774,7 +770,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask, for_each_cpu(cpu, cfd->cpumask) { call_single_data_t *csd; - csd = &per_cpu_ptr(cfd->pcpu, cpu)->csd; + csd = per_cpu_ptr(cfd->csd, cpu); csd_lock_wait(csd); } } -- cgit From 203e435844734cfa503cd1755f35db2514db5cca Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 20 Mar 2023 17:55:16 -0700 Subject: kernel/smp: Make csdlock_debug= resettable It is currently possible to set the csdlock_debug_enabled static branch, but not to reset it. This is an issue when several different entities supply kernel boot parameters and also for kernels built with CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT=y. Therefore, make the csdlock_debug=0 kernel boot parameter turn off debugging. Last one wins! Reported-by: Jes Sorensen Signed-off-by: Paul E. McKenney Signed-off-by: Peter Zijlstra (Intel) Acked-by: Juergen Gross Link: https://lore.kernel.org/r/20230321005516.50558-4-paulmck@kernel.org --- Documentation/admin-guide/kernel-parameters.txt | 13 +++++++------ kernel/smp.c | 11 ++++++++--- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'kernel/smp.c') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b15198a85acb..5f2ec4b0f927 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -889,12 +889,13 @@ cs89x0_media= [HW,NET] Format: { rj45 | aui | bnc } - csdlock_debug= [KNL] Enable debug add-ons of cross-CPU function call - handling. When switched on, additional debug data is - printed to the console in case a hanging CPU is - detected, and that CPU is pinged again in order to try - to resolve the hang situation. The default value of - this option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT + csdlock_debug= [KNL] Enable or disable debug add-ons of cross-CPU + function call handling. When switched on, + additional debug data is printed to the console + in case a hanging CPU is detected, and that + CPU is pinged again in order to try to resolve + the hang situation. The default value of this + option depends on the CSD_LOCK_WAIT_DEBUG_DEFAULT Kconfig option. dasd= [HW,NET] diff --git a/kernel/smp.c b/kernel/smp.c index 7a85bcddd9dc..298ba7570621 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -116,11 +116,16 @@ static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug */ static int __init csdlock_debug(char *str) { + int ret; unsigned int val = 0; - get_option(&str, &val); - if (val) - static_branch_enable(&csdlock_debug_enabled); + ret = get_option(&str, &val); + if (ret) { + if (val) + static_branch_enable(&csdlock_debug_enabled); + else + static_branch_disable(&csdlock_debug_enabled); + } return 1; } -- cgit From cc9cb0a71725aa8dd8d8f534a9b562bbf7981f75 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 7 Mar 2023 14:35:53 +0000 Subject: sched, smp: Trace IPIs sent via send_call_function_single_ipi() send_call_function_single_ipi() is the thing that sends IPIs at the bottom of smp_call_function*() via either generic_exec_single() or smp_call_function_many_cond(). Give it an IPI-related tracepoint. Note that this ends up tracing any IPI sent via __smp_call_single_queue(), which covers __ttwu_queue_wakelist() and irq_work_queue_on() "for free". Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (Google) Acked-by: Ingo Molnar Link: https://lore.kernel.org/r/20230307143558.294354-3-vschneid@redhat.com --- arch/arm/kernel/smp.c | 1 - arch/arm64/kernel/smp.c | 1 - kernel/sched/core.c | 9 +++++++-- kernel/smp.c | 2 ++ 4 files changed, 9 insertions(+), 4 deletions(-) (limited to 'kernel/smp.c') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 0b8c25763adc..5edf09237b2f 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -48,7 +48,6 @@ #include #include -#define CREATE_TRACE_POINTS #include /* diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 4e8327264255..438c16fc4463 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -51,7 +51,6 @@ #include #include -#define CREATE_TRACE_POINTS #include DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 488655f2319f..c26a2cd99ec7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -80,6 +80,7 @@ #define CREATE_TRACE_POINTS #include #include +#include #undef CREATE_TRACE_POINTS #include "sched.h" @@ -95,6 +96,8 @@ #include "../../io_uring/io-wq.h" #include "../smpboot.h" +EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask); + /* * Export tracepoints that act as a bare tracehook (ie: have no trace event * associated with them) to allow external modules to probe them. @@ -3830,10 +3833,12 @@ void send_call_function_single_ipi(int cpu) { struct rq *rq = cpu_rq(cpu); - if (!set_nr_if_polling(rq->idle)) + if (!set_nr_if_polling(rq->idle)) { + trace_ipi_send_cpumask(cpumask_of(cpu), _RET_IP_, NULL); arch_send_call_function_single_ipi(cpu); - else + } else { trace_sched_wake_idle_without_ipi(cpu); + } } /* diff --git a/kernel/smp.c b/kernel/smp.c index 298ba7570621..770e879a7274 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -26,6 +26,8 @@ #include #include +#include + #include "smpboot.h" #include "sched/smp.h" -- cgit From 08407b5f61c1bbd4ebb26a76474df4354fd76fb7 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 7 Mar 2023 14:35:54 +0000 Subject: smp: Trace IPIs sent via arch_send_call_function_ipi_mask() This simply wraps around the arch function and prepends it with a tracepoint, similar to send_call_function_single_ipi(). Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20230307143558.294354-4-vschneid@redhat.com --- kernel/smp.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index 770e879a7274..03e6d576295d 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -103,6 +103,13 @@ void __init call_function_init(void) smpcfd_prepare_cpu(smp_processor_id()); } +static __always_inline void +send_call_function_ipi_mask(struct cpumask *mask) +{ + trace_ipi_send_cpumask(mask, _RET_IP_, NULL); + arch_send_call_function_ipi_mask(mask); +} + #ifdef CONFIG_CSD_LOCK_WAIT_DEBUG static DEFINE_STATIC_KEY_MAYBE(CONFIG_CSD_LOCK_WAIT_DEBUG_DEFAULT, csdlock_debug_enabled); @@ -762,7 +769,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask, if (nr_cpus == 1) send_call_function_single_ipi(last_cpu); else if (likely(nr_cpus > 1)) - arch_send_call_function_ipi_mask(cfd->cpumask_ipi); + send_call_function_ipi_mask(cfd->cpumask_ipi); } if (run_local && (!cond_func || cond_func(this_cpu, info))) { -- cgit From 253a0fb4c62827cdcaf43afcea5d675507eaf7a3 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 7 Mar 2023 14:35:57 +0000 Subject: smp: reword smp call IPI comment Accessing the call_single_queue hasn't involved a spinlock since 2014: 6897fc22ea01 ("kernel: use lockless list for smp_call_function_single") The llist operations (namely cmpxchg() and xchg()) provide similar ordering guarantees, update the comment to lessen confusion. Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230307143558.294354-7-vschneid@redhat.com --- kernel/smp.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index 03e6d576295d..6bbfabbe62fc 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -312,9 +312,10 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); void __smp_call_single_queue(int cpu, struct llist_node *node) { /* - * The list addition should be visible before sending the IPI - * handler locks the list to pull the entry off it because of - * normal cache coherency rules implied by spinlocks. + * The list addition should be visible to the target CPU when it pops + * the head of the list to pull the entry off it in the IPI handler + * because of normal cache coherency rules implied by the underlying + * llist ops. * * If IPIs can go out of order to the cache coherency protocol * in an architecture, sufficient synchronisation should be added -- cgit From 68f4ff04dbada18dad79659c266a8e5e29e458cd Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Tue, 7 Mar 2023 14:35:58 +0000 Subject: sched, smp: Trace smp callback causing an IPI Context ======= The newly-introduced ipi_send_cpumask tracepoint has a "callback" parameter which so far has only been fed with NULL. While CSD_TYPE_SYNC/ASYNC and CSD_TYPE_IRQ_WORK share a similar backing struct layout (meaning their callback func can be accessed without caring about the actual CSD type), CSD_TYPE_TTWU doesn't even have a function attached to its struct. This means we need to check the type of a CSD before eventually dereferencing its associated callback. This isn't as trivial as it sounds: the CSD type is stored in __call_single_node.u_flags, which get cleared right before the callback is executed via csd_unlock(). This implies checking the CSD type before it is enqueued on the call_single_queue, as the target CPU's queue can be flushed before we get to sending an IPI. Furthermore, send_call_function_single_ipi() only has a CPU parameter, and would need to have an additional argument to trickle down the invoked function. This is somewhat silly, as the extra argument will always be pushed down to the function even when nothing is being traced, which is unnecessary overhead. Changes ======= send_call_function_single_ipi() is only used by smp.c, and is defined in sched/core.c as it contains scheduler-specific ops (set_nr_if_polling() of a CPU's idle task). Split it into two parts: the scheduler bits remain in sched/core.c, and the actual IPI emission is moved into smp.c. This lets us define an __always_inline helper function that can take the related callback as parameter without creating useless register pressure in the non-traced path which only gains a (disabled) static branch. Do the same thing for the multi IPI case. Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20230307143558.294354-8-vschneid@redhat.com --- kernel/sched/core.c | 18 +++++++++++------- kernel/sched/smp.h | 2 +- kernel/smp.c | 49 +++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 53 insertions(+), 16 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c26a2cd99ec7..b0a48cfc0a22 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3829,16 +3829,20 @@ void sched_ttwu_pending(void *arg) rq_unlock_irqrestore(rq, &rf); } -void send_call_function_single_ipi(int cpu) +/* + * Prepare the scene for sending an IPI for a remote smp_call + * + * Returns true if the caller can proceed with sending the IPI. + * Returns false otherwise. + */ +bool call_function_single_prep_ipi(int cpu) { - struct rq *rq = cpu_rq(cpu); - - if (!set_nr_if_polling(rq->idle)) { - trace_ipi_send_cpumask(cpumask_of(cpu), _RET_IP_, NULL); - arch_send_call_function_single_ipi(cpu); - } else { + if (set_nr_if_polling(cpu_rq(cpu)->idle)) { trace_sched_wake_idle_without_ipi(cpu); + return false; } + + return true; } /* diff --git a/kernel/sched/smp.h b/kernel/sched/smp.h index 2eb23dd0f285..21ac44428bb0 100644 --- a/kernel/sched/smp.h +++ b/kernel/sched/smp.h @@ -6,7 +6,7 @@ extern void sched_ttwu_pending(void *arg); -extern void send_call_function_single_ipi(int cpu); +extern bool call_function_single_prep_ipi(int cpu); #ifdef CONFIG_SMP extern void flush_smp_call_function_queue(void); diff --git a/kernel/smp.c b/kernel/smp.c index 6bbfabbe62fc..37e9613a0889 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -104,9 +104,18 @@ void __init call_function_init(void) } static __always_inline void -send_call_function_ipi_mask(struct cpumask *mask) +send_call_function_single_ipi(int cpu, smp_call_func_t func) { - trace_ipi_send_cpumask(mask, _RET_IP_, NULL); + if (call_function_single_prep_ipi(cpu)) { + trace_ipi_send_cpumask(cpumask_of(cpu), _RET_IP_, func); + arch_send_call_function_single_ipi(cpu); + } +} + +static __always_inline void +send_call_function_ipi_mask(struct cpumask *mask, smp_call_func_t func) +{ + trace_ipi_send_cpumask(mask, _RET_IP_, func); arch_send_call_function_ipi_mask(mask); } @@ -307,9 +316,8 @@ static __always_inline void csd_unlock(struct __call_single_data *csd) smp_store_release(&csd->node.u_flags, 0); } -static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); - -void __smp_call_single_queue(int cpu, struct llist_node *node) +static __always_inline void +raw_smp_call_single_queue(int cpu, struct llist_node *node, smp_call_func_t func) { /* * The list addition should be visible to the target CPU when it pops @@ -324,7 +332,32 @@ void __smp_call_single_queue(int cpu, struct llist_node *node) * equipped to do the right thing... */ if (llist_add(node, &per_cpu(call_single_queue, cpu))) - send_call_function_single_ipi(cpu); + send_call_function_single_ipi(cpu, func); +} + +static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); + +void __smp_call_single_queue(int cpu, struct llist_node *node) +{ + /* + * We have to check the type of the CSD before queueing it, because + * once queued it can have its flags cleared by + * flush_smp_call_function_queue() + * even if we haven't sent the smp_call IPI yet (e.g. the stopper + * executes migration_cpu_stop() on the remote CPU). + */ + if (trace_ipi_send_cpumask_enabled()) { + call_single_data_t *csd; + smp_call_func_t func; + + csd = container_of(node, call_single_data_t, node.llist); + func = CSD_TYPE(csd) == CSD_TYPE_TTWU ? + sched_ttwu_pending : csd->func; + + raw_smp_call_single_queue(cpu, node, func); + } else { + raw_smp_call_single_queue(cpu, node, NULL); + } } /* @@ -768,9 +801,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask, * provided mask. */ if (nr_cpus == 1) - send_call_function_single_ipi(last_cpu); + send_call_function_single_ipi(last_cpu, func); else if (likely(nr_cpus > 1)) - send_call_function_ipi_mask(cfd->cpumask_ipi); + send_call_function_ipi_mask(cfd->cpumask_ipi, func); } if (run_local && (!cond_func || cond_func(this_cpu, info))) { -- cgit From 68e2d17c9eb311ab59aeb6d0c38aad8985fa2596 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Mar 2023 11:28:36 +0100 Subject: trace: Add trace_ipi_send_cpu() Because copying cpumasks around when targeting a single CPU is a bit daft... Tested-and-reviewed-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230322103004.GA571242%40hirez.programming.kicks-ass.net --- include/linux/smp.h | 6 +++--- include/trace/events/ipi.h | 22 ++++++++++++++++++++++ kernel/irq_work.c | 6 ++---- kernel/sched/core.c | 1 + kernel/smp.c | 4 ++-- 5 files changed, 30 insertions(+), 9 deletions(-) (limited to 'kernel/smp.c') diff --git a/include/linux/smp.h b/include/linux/smp.h index c036a2228d8d..ed8f344ba627 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -130,9 +130,9 @@ extern void arch_smp_send_reschedule(int cpu); * scheduler_ipi() is inline so can't be passed as callback reason, but the * callsite IP should be sufficient for root-causing IPIs sent from here. */ -#define smp_send_reschedule(cpu) ({ \ - trace_ipi_send_cpumask(cpumask_of(cpu), _RET_IP_, NULL); \ - arch_smp_send_reschedule(cpu); \ +#define smp_send_reschedule(cpu) ({ \ + trace_ipi_send_cpu(cpu, _RET_IP_, NULL); \ + arch_smp_send_reschedule(cpu); \ }) /* diff --git a/include/trace/events/ipi.h b/include/trace/events/ipi.h index b1125dc27682..3de9bfc982ce 100644 --- a/include/trace/events/ipi.h +++ b/include/trace/events/ipi.h @@ -35,6 +35,28 @@ TRACE_EVENT(ipi_raise, TP_printk("target_mask=%s (%s)", __get_bitmask(target_cpus), __entry->reason) ); +TRACE_EVENT(ipi_send_cpu, + + TP_PROTO(const unsigned int cpu, unsigned long callsite, void *callback), + + TP_ARGS(cpu, callsite, callback), + + TP_STRUCT__entry( + __field(unsigned int, cpu) + __field(void *, callsite) + __field(void *, callback) + ), + + TP_fast_assign( + __entry->cpu = cpu; + __entry->callsite = (void *)callsite; + __entry->callback = callback; + ), + + TP_printk("cpu=%u callsite=%pS callback=%pS", + __entry->cpu, __entry->callsite, __entry->callback) +); + TRACE_EVENT(ipi_send_cpumask, TP_PROTO(const struct cpumask *cpumask, unsigned long callsite, void *callback), diff --git a/kernel/irq_work.c b/kernel/irq_work.c index c33e88e32a67..2f4fb336dda1 100644 --- a/kernel/irq_work.c +++ b/kernel/irq_work.c @@ -78,10 +78,8 @@ void __weak arch_irq_work_raise(void) static __always_inline void irq_work_raise(struct irq_work *work) { - if (trace_ipi_send_cpumask_enabled() && arch_irq_work_has_interrupt()) - trace_ipi_send_cpumask(cpumask_of(smp_processor_id()), - _RET_IP_, - work->func); + if (trace_ipi_send_cpu_enabled() && arch_irq_work_has_interrupt()) + trace_ipi_send_cpu(smp_processor_id(), _RET_IP_, work->func); arch_irq_work_raise(); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b0a48cfc0a22..ad40755ddc11 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -96,6 +96,7 @@ #include "../../io_uring/io-wq.h" #include "../smpboot.h" +EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu); EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask); /* diff --git a/kernel/smp.c b/kernel/smp.c index 37e9613a0889..43f0796ecdb2 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -107,7 +107,7 @@ static __always_inline void send_call_function_single_ipi(int cpu, smp_call_func_t func) { if (call_function_single_prep_ipi(cpu)) { - trace_ipi_send_cpumask(cpumask_of(cpu), _RET_IP_, func); + trace_ipi_send_cpu(cpu, _RET_IP_, func); arch_send_call_function_single_ipi(cpu); } } @@ -346,7 +346,7 @@ void __smp_call_single_queue(int cpu, struct llist_node *node) * even if we haven't sent the smp_call IPI yet (e.g. the stopper * executes migration_cpu_stop() on the remote CPU). */ - if (trace_ipi_send_cpumask_enabled()) { + if (trace_ipi_send_cpu_enabled()) { call_single_data_t *csd; smp_call_func_t func; -- cgit From 5c3124975e15c1fadd5af1c61e4d627cf6d97ba2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 22 Mar 2023 14:58:36 +0100 Subject: trace,smp: Trace all smp_function_call*() invocations (Ab)use the trace_ipi_send_cpu*() family to trace all smp_function_call*() invocations, not only those that result in an actual IPI. The queued entries log their callback function while the actual IPIs are traced on generic_smp_call_function_single_interrupt(). Signed-off-by: Peter Zijlstra (Intel) --- kernel/smp.c | 66 +++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 30 deletions(-) (limited to 'kernel/smp.c') diff --git a/kernel/smp.c b/kernel/smp.c index 43f0796ecdb2..ab3e5dad6cfe 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -104,18 +104,20 @@ void __init call_function_init(void) } static __always_inline void -send_call_function_single_ipi(int cpu, smp_call_func_t func) +send_call_function_single_ipi(int cpu) { if (call_function_single_prep_ipi(cpu)) { - trace_ipi_send_cpu(cpu, _RET_IP_, func); + trace_ipi_send_cpu(cpu, _RET_IP_, + generic_smp_call_function_single_interrupt); arch_send_call_function_single_ipi(cpu); } } static __always_inline void -send_call_function_ipi_mask(struct cpumask *mask, smp_call_func_t func) +send_call_function_ipi_mask(struct cpumask *mask) { - trace_ipi_send_cpumask(mask, _RET_IP_, func); + trace_ipi_send_cpumask(mask, _RET_IP_, + generic_smp_call_function_single_interrupt); arch_send_call_function_ipi_mask(mask); } @@ -316,25 +318,6 @@ static __always_inline void csd_unlock(struct __call_single_data *csd) smp_store_release(&csd->node.u_flags, 0); } -static __always_inline void -raw_smp_call_single_queue(int cpu, struct llist_node *node, smp_call_func_t func) -{ - /* - * The list addition should be visible to the target CPU when it pops - * the head of the list to pull the entry off it in the IPI handler - * because of normal cache coherency rules implied by the underlying - * llist ops. - * - * If IPIs can go out of order to the cache coherency protocol - * in an architecture, sufficient synchronisation should be added - * to arch code to make it appear to obey cache coherency WRT - * locking and barrier primitives. Generic code isn't really - * equipped to do the right thing... - */ - if (llist_add(node, &per_cpu(call_single_queue, cpu))) - send_call_function_single_ipi(cpu, func); -} - static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); void __smp_call_single_queue(int cpu, struct llist_node *node) @@ -354,10 +337,23 @@ void __smp_call_single_queue(int cpu, struct llist_node *node) func = CSD_TYPE(csd) == CSD_TYPE_TTWU ? sched_ttwu_pending : csd->func; - raw_smp_call_single_queue(cpu, node, func); - } else { - raw_smp_call_single_queue(cpu, node, NULL); + trace_ipi_send_cpu(cpu, _RET_IP_, func); } + + /* + * The list addition should be visible to the target CPU when it pops + * the head of the list to pull the entry off it in the IPI handler + * because of normal cache coherency rules implied by the underlying + * llist ops. + * + * If IPIs can go out of order to the cache coherency protocol + * in an architecture, sufficient synchronisation should be added + * to arch code to make it appear to obey cache coherency WRT + * locking and barrier primitives. Generic code isn't really + * equipped to do the right thing... + */ + if (llist_add(node, &per_cpu(call_single_queue, cpu))) + send_call_function_single_ipi(cpu); } /* @@ -732,9 +728,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask, int cpu, last_cpu, this_cpu = smp_processor_id(); struct call_function_data *cfd; bool wait = scf_flags & SCF_WAIT; + int nr_cpus = 0, nr_queued = 0; bool run_remote = false; bool run_local = false; - int nr_cpus = 0; lockdep_assert_preemption_disabled(); @@ -776,8 +772,10 @@ static void smp_call_function_many_cond(const struct cpumask *mask, for_each_cpu(cpu, cfd->cpumask) { call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu); - if (cond_func && !cond_func(cpu, info)) + if (cond_func && !cond_func(cpu, info)) { + __cpumask_clear_cpu(cpu, cfd->cpumask); continue; + } csd_lock(csd); if (wait) @@ -793,17 +791,25 @@ static void smp_call_function_many_cond(const struct cpumask *mask, nr_cpus++; last_cpu = cpu; } + nr_queued++; } + /* + * Trace each smp_function_call_*() as an IPI, actual IPIs + * will be traced with func==generic_smp_call_function_single_ipi(). + */ + if (nr_queued) + trace_ipi_send_cpumask(cfd->cpumask, _RET_IP_, func); + /* * Choose the most efficient way to send an IPI. Note that the * number of CPUs might be zero due to concurrent changes to the * provided mask. */ if (nr_cpus == 1) - send_call_function_single_ipi(last_cpu, func); + send_call_function_single_ipi(last_cpu); else if (likely(nr_cpus > 1)) - send_call_function_ipi_mask(cfd->cpumask_ipi, func); + send_call_function_ipi_mask(cfd->cpumask_ipi); } if (run_local && (!cond_func || cond_func(this_cpu, info))) { -- cgit