From eae9f147a4b02e132187a2d88a403b9ccc28212a Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Mon, 13 Dec 2021 12:32:09 +0530 Subject: rcu: Remove unused rcu_state.boost Signed-off-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c5b45c2f68a1..109429e70a64 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1175,8 +1175,6 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) if (rnp->boost_kthread_task || !rcu_scheduler_fully_active) return; - rcu_state.boost = 1; - t = kthread_create(rcu_boost_kthread, (void *)rnp, "rcub/%d", rnp_index); if (WARN_ON_ONCE(IS_ERR(t))) -- cgit From 10c535787436d62ea28156a4b91365fd89b5a432 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 21 Jan 2022 12:40:08 -0800 Subject: rcu: Don't deboost before reporting expedited quiescent state Currently rcu_preempt_deferred_qs_irqrestore() releases rnp->boost_mtx before reporting the expedited quiescent state. Under heavy real-time load, this can result in this function being preempted before the quiescent state is reported, which can in turn prevent the expedited grace period from completing. Tim Murray reports that the resulting expedited grace periods can take hundreds of milliseconds and even more than one second, when they should normally complete in less than a millisecond. This was fine given that there were no particular response-time constraints for synchronize_rcu_expedited(), as it was designed for throughput rather than latency. However, some users now need sub-100-millisecond response-time constratints. This patch therefore follows Neeraj's suggestion (seconded by Tim and by Uladzislau Rezki) of simply reversing the two operations. Reported-by: Tim Murray Reported-by: Joel Fernandes Reported-by: Neeraj Upadhyay Reviewed-by: Neeraj Upadhyay Reviewed-by: Uladzislau Rezki (Sony) Tested-by: Tim Murray Cc: Todd Kjos Cc: Sandeep Patil Cc: # 5.4.x Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_plugin.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 109429e70a64..02ac057ba3f8 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -556,16 +556,16 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } - /* Unboost if we were boosted. */ - if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) - rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex); - /* * If this was the last task on the expedited lists, * then we need to report up the rcu_node hierarchy. */ if (!empty_exp && empty_exp_now) rcu_report_exp_rnp(rnp, true); + + /* Unboost if we were boosted. */ + if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) + rt_mutex_futex_unlock(&rnp->boost_mtx.rtmutex); } else { local_irq_restore(flags); } -- cgit From c9515875850fefcc79492c5189fe8431e75ddec5 Mon Sep 17 00:00:00 2001 From: Zqiang Date: Tue, 25 Jan 2022 10:47:44 +0800 Subject: rcu: Add per-CPU rcuc task dumps to RCU CPU stall warnings When the rcutree.use_softirq kernel boot parameter is set to zero, all RCU_SOFTIRQ processing is carried out by the per-CPU rcuc kthreads. If these kthreads are being starved, quiescent states will not be reported, which in turn means that the grace period will not end, which can in turn trigger RCU CPU stall warnings. This commit therefore dumps stack traces of stalled CPUs' rcuc kthreads, which can help identify what is preventing those kthreads from running. Suggested-by: Ammar Faizi Reviewed-by: Ammar Faizi Signed-off-by: Zqiang Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 3 +++ kernel/rcu/tree.h | 1 + kernel/rcu/tree_plugin.h | 3 +++ kernel/rcu/tree_stall.h | 35 +++++++++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9e4c5b281f00..bd9b2af247ab 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2850,10 +2850,12 @@ static void rcu_cpu_kthread(unsigned int cpu) { unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status); char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work); + unsigned long *j = this_cpu_ptr(&rcu_data.rcuc_activity); int spincnt; trace_rcu_utilization(TPS("Start CPU kthread@rcu_run")); for (spincnt = 0; spincnt < 10; spincnt++) { + WRITE_ONCE(*j, jiffies); local_bh_disable(); *statusp = RCU_KTHREAD_RUNNING; local_irq_disable(); @@ -2874,6 +2876,7 @@ static void rcu_cpu_kthread(unsigned int cpu) schedule_timeout_idle(2); trace_rcu_utilization(TPS("End CPU kthread@rcu_yield")); *statusp = RCU_KTHREAD_WAITING; + WRITE_ONCE(*j, jiffies); } static struct smp_hotplug_thread rcu_cpu_thread_spec = { diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index e9990945483f..b84cc5742c31 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -239,6 +239,7 @@ struct rcu_data { /* rcuc per-CPU kthread or NULL. */ unsigned int rcu_cpu_kthread_status; char rcu_cpu_has_work; + unsigned long rcuc_activity; /* 7) Diagnostic data, including RCU CPU stall warnings. */ unsigned int softirq_snap; /* Snapshot of softirq activity. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 02ac057ba3f8..8167cab1bffc 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -996,12 +996,15 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) */ static void rcu_cpu_kthread_setup(unsigned int cpu) { + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); #ifdef CONFIG_RCU_BOOST struct sched_param sp; sp.sched_priority = kthread_prio; sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); #endif /* #ifdef CONFIG_RCU_BOOST */ + + WRITE_ONCE(rdp->rcuc_activity, jiffies); } #ifdef CONFIG_RCU_BOOST diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 21bebf7c9030..0c5d8516516a 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -379,6 +379,15 @@ static bool rcu_is_gp_kthread_starving(unsigned long *jp) return j > 2 * HZ; } +static bool rcu_is_rcuc_kthread_starving(struct rcu_data *rdp, unsigned long *jp) +{ + unsigned long j = jiffies - READ_ONCE(rdp->rcuc_activity); + + if (jp) + *jp = j; + return j > 2 * HZ; +} + /* * Print out diagnostic information for the specified stalled CPU. * @@ -430,6 +439,29 @@ static void print_cpu_stall_info(int cpu) falsepositive ? " (false positive?)" : ""); } +static void rcuc_kthread_dump(struct rcu_data *rdp) +{ + int cpu; + unsigned long j; + struct task_struct *rcuc; + + rcuc = rdp->rcu_cpu_kthread_task; + if (!rcuc) + return; + + cpu = task_cpu(rcuc); + if (cpu_is_offline(cpu) || idle_cpu(cpu)) + return; + + if (!rcu_is_rcuc_kthread_starving(rdp, &j)) + return; + + pr_err("%s kthread starved for %ld jiffies\n", rcuc->comm, j); + sched_show_task(rcuc); + if (!trigger_single_cpu_backtrace(cpu)) + dump_cpu_task(cpu); +} + /* Complain about starvation of grace-period kthread. */ static void rcu_check_gp_kthread_starvation(void) { @@ -601,6 +633,9 @@ static void print_cpu_stall(unsigned long gps) rcu_check_gp_kthread_expired_fqs_timer(); rcu_check_gp_kthread_starvation(); + if (!use_softirq) + rcuc_kthread_dump(rdp); + rcu_dump_cpu_stacks(); raw_spin_lock_irqsave_rcu_node(rnp, flags); -- cgit From 5ae0f1b58b28b53f4ab3708ef9337a2665e79664 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 10 Dec 2021 13:44:17 -0800 Subject: rcu: Create and use an rcu_rdp_cpu_online() The pattern "rdp->grpmask & rcu_rnp_online_cpus(rnp)" occurs frequently in RCU code in order to determine whether rdp->cpu is online from an RCU perspective. This commit therefore creates an rcu_rdp_cpu_online() function to replace it. [ paulmck: Apply kernel test robot unused-variable feedback. ] Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 21 +++++++++++++-------- kernel/rcu/tree_plugin.h | 6 ++---- 2 files changed, 15 insertions(+), 12 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2d70b91e3fbc..1d3507d563db 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -222,6 +222,16 @@ static unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp) return READ_ONCE(rnp->qsmaskinitnext); } +/* + * Is the CPU corresponding to the specified rcu_data structure online + * from RCU's perspective? This perspective is given by that structure's + * ->qsmaskinitnext field rather than by the global cpu_online_mask. + */ +static bool rcu_rdp_cpu_online(struct rcu_data *rdp) +{ + return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode)); +} + /* * Return true if an RCU grace period is in progress. The READ_ONCE()s * permit this function to be invoked without holding the root rcu_node @@ -1168,14 +1178,12 @@ void rcu_request_urgent_qs_task(struct task_struct *t) bool rcu_lockdep_current_cpu_online(void) { struct rcu_data *rdp; - struct rcu_node *rnp; bool ret = false; if (in_nmi() || !rcu_scheduler_fully_active) return true; preempt_disable_notrace(); rdp = this_cpu_ptr(&rcu_data); - rnp = rdp->mynode; /* * Strictly, we care here about the case where the current CPU is * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask @@ -1183,8 +1191,7 @@ bool rcu_lockdep_current_cpu_online(void) * false positive if it's held by some *other* CPU, but that's * OK because that just means a false *negative* on the warning. */ - if (rdp->grpmask & rcu_rnp_online_cpus(rnp) || - arch_spin_is_locked(&rcu_state.ofl_lock)) + if (rcu_rdp_cpu_online(rdp) || arch_spin_is_locked(&rcu_state.ofl_lock)) ret = true; preempt_enable_notrace(); return ret; @@ -1269,8 +1276,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * For more detail, please refer to the "Hotplug CPU" section * of RCU's Requirements documentation. */ - if (WARN_ON_ONCE(!(rdp->grpmask & rcu_rnp_online_cpus(rnp)))) { - bool onl; + if (WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp))) { struct rcu_node *rnp1; pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n", @@ -1279,9 +1285,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) for (rnp1 = rnp; rnp1; rnp1 = rnp1->parent) pr_info("%s: %d:%d ->qsmask %#lx ->qsmaskinit %#lx ->qsmaskinitnext %#lx ->rcu_gp_init_mask %#lx\n", __func__, rnp1->grplo, rnp1->grphi, rnp1->qsmask, rnp1->qsmaskinit, rnp1->qsmaskinitnext, rnp1->rcu_gp_init_mask); - onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp)); pr_info("%s %d: %c online: %ld(%d) offline: %ld(%d)\n", - __func__, rdp->cpu, ".o"[onl], + __func__, rdp->cpu, ".o"[rcu_rdp_cpu_online(rdp)], (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags, (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags); return 1; /* Break things loose after complaining. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c5b45c2f68a1..d3db2168598e 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -330,7 +330,7 @@ void rcu_note_context_switch(bool preempt) * then queue the task as required based on the states * of any ongoing and expedited grace periods. */ - WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0); + WARN_ON_ONCE(!rcu_rdp_cpu_online(rdp)); WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); trace_rcu_preempt_task(rcu_state.name, t->pid, @@ -773,7 +773,6 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) int cpu; int i; struct list_head *lhp; - bool onl; struct rcu_data *rdp; struct rcu_node *rnp1; @@ -797,9 +796,8 @@ dump_blkd_tasks(struct rcu_node *rnp, int ncheck) pr_cont("\n"); for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) { rdp = per_cpu_ptr(&rcu_data, cpu); - onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp)); pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n", - cpu, ".o"[onl], + cpu, ".o"[rcu_rdp_cpu_online(rdp)], (long)rdp->rcu_onl_gp_seq, rdp->rcu_onl_gp_flags, (long)rdp->rcu_ofl_gp_seq, rdp->rcu_ofl_gp_flags); } -- cgit From 218b957a6959a2fb5b3967fc824072bb89ac2611 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 8 Dec 2021 23:41:53 +0000 Subject: rcu: Add mutex for rcu boost kthread spawning and affinity setting As we handle parallel CPU bringup, we will need to take care to avoid spawning multiple boost threads, or race conditions when setting their affinity. Spotted by Paul McKenney. Signed-off-by: David Woodhouse Reviewed-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tree.c | 1 + kernel/rcu/tree.h | 3 +++ kernel/rcu/tree_plugin.h | 10 ++++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a4c25a6283b0..d1d1a8c51223 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4570,6 +4570,7 @@ static void __init rcu_init_one(void) init_waitqueue_head(&rnp->exp_wq[2]); init_waitqueue_head(&rnp->exp_wq[3]); spin_lock_init(&rnp->exp_lock); + mutex_init(&rnp->boost_kthread_mutex); } } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 486fc901bd08..3b8b60de07c3 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -110,6 +110,9 @@ struct rcu_node { /* side effect, not as a lock. */ unsigned long boost_time; /* When to start boosting (jiffies). */ + struct mutex boost_kthread_mutex; + /* Exclusion for thread spawning and affinity */ + /* manipulation. */ struct task_struct *boost_kthread_task; /* kthread that takes care of priority */ /* boosting for this rcu_node structure. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c5b45c2f68a1..07845dcd33c5 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1172,15 +1172,16 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) struct sched_param sp; struct task_struct *t; + mutex_lock(&rnp->boost_kthread_mutex); if (rnp->boost_kthread_task || !rcu_scheduler_fully_active) - return; + goto out; rcu_state.boost = 1; t = kthread_create(rcu_boost_kthread, (void *)rnp, "rcub/%d", rnp_index); if (WARN_ON_ONCE(IS_ERR(t))) - return; + goto out; raw_spin_lock_irqsave_rcu_node(rnp, flags); rnp->boost_kthread_task = t; @@ -1188,6 +1189,9 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) sp.sched_priority = kthread_prio; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ + + out: + mutex_unlock(&rnp->boost_kthread_mutex); } /* @@ -1210,6 +1214,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) return; if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) return; + mutex_lock(&rnp->boost_kthread_mutex); for_each_leaf_node_possible_cpu(rnp, cpu) if ((mask & leaf_node_cpu_bit(rnp, cpu)) && cpu != outgoingcpu) @@ -1218,6 +1223,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) if (cpumask_weight(cm) == 0) cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU)); set_cpus_allowed_ptr(t, cm); + mutex_unlock(&rnp->boost_kthread_mutex); free_cpumask_var(cm); } -- cgit From 6a2c1d450a6a328027280a854019c55de989e14e Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Sun, 23 Jan 2022 10:38:53 -0800 Subject: rcu: Replace cpumask_weight with cpumask_empty where appropriate In some places, RCU code calls cpumask_weight() to check if any bit of a given cpumask is set. We can do it more efficiently with cpumask_empty() because cpumask_empty() stops traversing the cpumask as soon as it finds first set bit, while cpumask_weight() counts all bits unconditionally. Signed-off-by: Yury Norov Acked-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- kernel/rcu/tree_nocb.h | 4 ++-- kernel/rcu/tree_plugin.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/rcu/tree_plugin.h') diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index eeafb546a7a0..f83c7b1d6110 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1169,7 +1169,7 @@ void __init rcu_init_nohz(void) struct rcu_data *rdp; #if defined(CONFIG_NO_HZ_FULL) - if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask)) + if (tick_nohz_full_running && !cpumask_empty(tick_nohz_full_mask)) need_rcu_nocb_mask = true; #endif /* #if defined(CONFIG_NO_HZ_FULL) */ @@ -1348,7 +1348,7 @@ static void __init rcu_organize_nocb_kthreads(void) */ void rcu_bind_current_to_nocb(void) { - if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask)) + if (cpumask_available(rcu_nocb_mask) && !cpumask_empty(rcu_nocb_mask)) WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask)); } EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 07845dcd33c5..efd0c87d2ffa 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1220,7 +1220,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) cpu != outgoingcpu) cpumask_set_cpu(cpu, cm); cpumask_and(cm, cm, housekeeping_cpumask(HK_FLAG_RCU)); - if (cpumask_weight(cm) == 0) + if (cpumask_empty(cm)) cpumask_copy(cm, housekeeping_cpumask(HK_FLAG_RCU)); set_cpus_allowed_ptr(t, cm); mutex_unlock(&rnp->boost_kthread_mutex); -- cgit