diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 4 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 2 | ||||
-rw-r--r-- | kernel/locking/locktorture.c | 14 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 103 | ||||
-rw-r--r-- | kernel/rcu/tiny.c | 38 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 181 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 35 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 123 |
8 files changed, 300 insertions, 200 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 94bbe4695232..9c9c9fab16cc 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -398,7 +398,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ - smpboot_unpark_threads(cpu); cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); goto out_release; } @@ -463,6 +462,7 @@ static int smpboot_thread_call(struct notifier_block *nfb, switch (action & ~CPU_TASKS_FROZEN) { + case CPU_DOWN_FAILED: case CPU_ONLINE: smpboot_unpark_threads(cpu); break; @@ -479,7 +479,7 @@ static struct notifier_block smpboot_thread_notifier = { .priority = CPU_PRI_SMPBOOT, }; -void __cpuinit smpboot_thread_init(void) +void smpboot_thread_init(void) { register_cpu_notifier(&smpboot_thread_notifier); } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 232f00f273cb..17fcb73c4a50 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -141,7 +141,7 @@ int perf_output_begin(struct perf_output_handle *handle, perf_output_get_handle(handle); do { - tail = ACCESS_ONCE(rb->user_page->data_tail); + tail = READ_ONCE_CTRL(rb->user_page->data_tail); offset = head = local_read(&rb->head); if (!rb->overwrite && unlikely(CIRC_SPACE(head, tail, perf_data_size(rb)) < size)) diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index ec8cce259779..32244186f1f2 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -122,12 +122,12 @@ static int torture_lock_busted_write_lock(void) static void torture_lock_busted_write_delay(struct torture_random_state *trsp) { - const unsigned long longdelay_us = 100; + const unsigned long longdelay_ms = 100; /* We want a long delay occasionally to force massive contention. */ if (!(torture_random(trsp) % - (cxt.nrealwriters_stress * 2000 * longdelay_us))) - mdelay(longdelay_us); + (cxt.nrealwriters_stress * 2000 * longdelay_ms))) + mdelay(longdelay_ms); #ifdef CONFIG_PREEMPT if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) preempt_schedule(); /* Allow test to be preempted. */ @@ -160,14 +160,14 @@ static int torture_spin_lock_write_lock(void) __acquires(torture_spinlock) static void torture_spin_lock_write_delay(struct torture_random_state *trsp) { const unsigned long shortdelay_us = 2; - const unsigned long longdelay_us = 100; + const unsigned long longdelay_ms = 100; /* We want a short delay mostly to emulate likely code, and * we want a long delay occasionally to force massive contention. */ if (!(torture_random(trsp) % - (cxt.nrealwriters_stress * 2000 * longdelay_us))) - mdelay(longdelay_us); + (cxt.nrealwriters_stress * 2000 * longdelay_ms))) + mdelay(longdelay_ms); if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2 * shortdelay_us))) udelay(shortdelay_us); @@ -309,7 +309,7 @@ static int torture_rwlock_read_lock_irq(void) __acquires(torture_rwlock) static void torture_rwlock_read_unlock_irq(void) __releases(torture_rwlock) { - write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags); + read_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags); } static struct lock_torture_ops rw_lock_irq_ops = { diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index a67ef6ff86b0..59e32684c23b 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -241,6 +241,7 @@ rcu_torture_free(struct rcu_torture *p) struct rcu_torture_ops { int ttype; void (*init)(void); + void (*cleanup)(void); int (*readlock)(void); void (*read_delay)(struct torture_random_state *rrsp); void (*readunlock)(int idx); @@ -477,10 +478,12 @@ static struct rcu_torture_ops rcu_busted_ops = { */ DEFINE_STATIC_SRCU(srcu_ctl); +static struct srcu_struct srcu_ctld; +static struct srcu_struct *srcu_ctlp = &srcu_ctl; -static int srcu_torture_read_lock(void) __acquires(&srcu_ctl) +static int srcu_torture_read_lock(void) __acquires(srcu_ctlp) { - return srcu_read_lock(&srcu_ctl); + return srcu_read_lock(srcu_ctlp); } static void srcu_read_delay(struct torture_random_state *rrsp) @@ -499,49 +502,49 @@ static void srcu_read_delay(struct torture_random_state *rrsp) rcu_read_delay(rrsp); } -static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) +static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp) { - srcu_read_unlock(&srcu_ctl, idx); + srcu_read_unlock(srcu_ctlp, idx); } static unsigned long srcu_torture_completed(void) { - return srcu_batches_completed(&srcu_ctl); + return srcu_batches_completed(srcu_ctlp); } static void srcu_torture_deferred_free(struct rcu_torture *rp) { - call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb); + call_srcu(srcu_ctlp, &rp->rtort_rcu, rcu_torture_cb); } static void srcu_torture_synchronize(void) { - synchronize_srcu(&srcu_ctl); + synchronize_srcu(srcu_ctlp); } static void srcu_torture_call(struct rcu_head *head, void (*func)(struct rcu_head *head)) { - call_srcu(&srcu_ctl, head, func); + call_srcu(srcu_ctlp, head, func); } static void srcu_torture_barrier(void) { - srcu_barrier(&srcu_ctl); + srcu_barrier(srcu_ctlp); } static void srcu_torture_stats(void) { int cpu; - int idx = srcu_ctl.completed & 0x1; + int idx = srcu_ctlp->completed & 0x1; pr_alert("%s%s per-CPU(idx=%d):", torture_type, TORTURE_FLAG, idx); for_each_possible_cpu(cpu) { long c0, c1; - c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx]; - c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]; + c0 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[!idx]; + c1 = (long)per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu)->c[idx]; pr_cont(" %d(%ld,%ld)", cpu, c0, c1); } pr_cont("\n"); @@ -549,7 +552,7 @@ static void srcu_torture_stats(void) static void srcu_torture_synchronize_expedited(void) { - synchronize_srcu_expedited(&srcu_ctl); + synchronize_srcu_expedited(srcu_ctlp); } static struct rcu_torture_ops srcu_ops = { @@ -569,6 +572,38 @@ static struct rcu_torture_ops srcu_ops = { .name = "srcu" }; +static void srcu_torture_init(void) +{ + rcu_sync_torture_init(); + WARN_ON(init_srcu_struct(&srcu_ctld)); + srcu_ctlp = &srcu_ctld; +} + +static void srcu_torture_cleanup(void) +{ + cleanup_srcu_struct(&srcu_ctld); + srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */ +} + +/* As above, but dynamically allocated. */ +static struct rcu_torture_ops srcud_ops = { + .ttype = SRCU_FLAVOR, + .init = srcu_torture_init, + .cleanup = srcu_torture_cleanup, + .readlock = srcu_torture_read_lock, + .read_delay = srcu_read_delay, + .readunlock = srcu_torture_read_unlock, + .started = NULL, + .completed = srcu_torture_completed, + .deferred_free = srcu_torture_deferred_free, + .sync = srcu_torture_synchronize, + .exp_sync = srcu_torture_synchronize_expedited, + .call = srcu_torture_call, + .cb_barrier = srcu_torture_barrier, + .stats = srcu_torture_stats, + .name = "srcud" +}; + /* * Definitions for sched torture testing. */ @@ -672,8 +707,8 @@ static void rcu_torture_boost_cb(struct rcu_head *head) struct rcu_boost_inflight *rbip = container_of(head, struct rcu_boost_inflight, rcu); - smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */ - rbip->inflight = 0; + /* Ensure RCU-core accesses precede clearing ->inflight */ + smp_store_release(&rbip->inflight, 0); } static int rcu_torture_boost(void *arg) @@ -710,9 +745,9 @@ static int rcu_torture_boost(void *arg) call_rcu_time = jiffies; while (ULONG_CMP_LT(jiffies, endtime)) { /* If we don't have a callback in flight, post one. */ - if (!rbi.inflight) { - smp_mb(); /* RCU core before ->inflight = 1. */ - rbi.inflight = 1; + if (!smp_load_acquire(&rbi.inflight)) { + /* RCU core before ->inflight = 1. */ + smp_store_release(&rbi.inflight, 1); call_rcu(&rbi.rcu, rcu_torture_boost_cb); if (jiffies - call_rcu_time > test_boost_duration * HZ - HZ / 2) { @@ -751,11 +786,10 @@ checkwait: stutter_wait("rcu_torture_boost"); } while (!torture_must_stop()); /* Clean up and exit. */ - while (!kthread_should_stop() || rbi.inflight) { + while (!kthread_should_stop() || smp_load_acquire(&rbi.inflight)) { torture_shutdown_absorb("rcu_torture_boost"); schedule_timeout_uninterruptible(1); } - smp_mb(); /* order accesses to ->inflight before stack-frame death. */ destroy_rcu_head_on_stack(&rbi.rcu); torture_kthread_stopping("rcu_torture_boost"); return 0; @@ -1054,7 +1088,7 @@ static void rcu_torture_timer(unsigned long unused) p = rcu_dereference_check(rcu_torture_current, rcu_read_lock_bh_held() || rcu_read_lock_sched_held() || - srcu_read_lock_held(&srcu_ctl)); + srcu_read_lock_held(srcu_ctlp)); if (p == NULL) { /* Leave because rcu_torture_writer is not yet underway */ cur_ops->readunlock(idx); @@ -1128,7 +1162,7 @@ rcu_torture_reader(void *arg) p = rcu_dereference_check(rcu_torture_current, rcu_read_lock_bh_held() || rcu_read_lock_sched_held() || - srcu_read_lock_held(&srcu_ctl)); + srcu_read_lock_held(srcu_ctlp)); if (p == NULL) { /* Wait for rcu_torture_writer to get underway */ cur_ops->readunlock(idx); @@ -1413,12 +1447,15 @@ static int rcu_torture_barrier_cbs(void *arg) do { wait_event(barrier_cbs_wq[myid], (newphase = - READ_ONCE(barrier_phase)) != lastphase || + smp_load_acquire(&barrier_phase)) != lastphase || torture_must_stop()); lastphase = newphase; - smp_mb(); /* ensure barrier_phase load before ->call(). */ if (torture_must_stop()) break; + /* + * The above smp_load_acquire() ensures barrier_phase load + * is ordered before the folloiwng ->call(). + */ cur_ops->call(&rcu, rcu_torture_barrier_cbf); if (atomic_dec_and_test(&barrier_cbs_count)) wake_up(&barrier_wq); @@ -1439,8 +1476,8 @@ static int rcu_torture_barrier(void *arg) do { atomic_set(&barrier_cbs_invoked, 0); atomic_set(&barrier_cbs_count, n_barrier_cbs); - smp_mb(); /* Ensure barrier_phase after prior assignments. */ - barrier_phase = !barrier_phase; + /* Ensure barrier_phase ordered after prior assignments. */ + smp_store_release(&barrier_phase, !barrier_phase); for (i = 0; i < n_barrier_cbs; i++) wake_up(&barrier_cbs_wq[i]); wait_event(barrier_wq, @@ -1588,10 +1625,14 @@ rcu_torture_cleanup(void) rcutorture_booster_cleanup(i); } - /* Wait for all RCU callbacks to fire. */ - + /* + * Wait for all RCU callbacks to fire, then do flavor-specific + * cleanup operations. + */ if (cur_ops->cb_barrier != NULL) cur_ops->cb_barrier(); + if (cur_ops->cleanup != NULL) + cur_ops->cleanup(); rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ @@ -1668,8 +1709,8 @@ rcu_torture_init(void) int cpu; int firsterr = 0; static struct rcu_torture_ops *torture_ops[] = { - &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, - RCUTORTURE_TASKS_OPS + &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, + &sched_ops, RCUTORTURE_TASKS_OPS }; if (!torture_init_begin(torture_type, verbose, &torture_runnable)) @@ -1701,7 +1742,7 @@ rcu_torture_init(void) if (nreaders >= 0) { nrealreaders = nreaders; } else { - nrealreaders = num_online_cpus() - 1; + nrealreaders = num_online_cpus() - 2 - nreaders; if (nrealreaders <= 0) nrealreaders = 1; } diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 069742d61c68..591af0cb7b9f 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -49,39 +49,6 @@ static void __call_rcu(struct rcu_head *head, #include "tiny_plugin.h" -/* - * Enter idle, which is an extended quiescent state if we have fully - * entered that mode. - */ -void rcu_idle_enter(void) -{ -} -EXPORT_SYMBOL_GPL(rcu_idle_enter); - -/* - * Exit an interrupt handler towards idle. - */ -void rcu_irq_exit(void) -{ -} -EXPORT_SYMBOL_GPL(rcu_irq_exit); - -/* - * Exit idle, so that we are no longer in an extended quiescent state. - */ -void rcu_idle_exit(void) -{ -} -EXPORT_SYMBOL_GPL(rcu_idle_exit); - -/* - * Enter an interrupt handler, moving away from idle. - */ -void rcu_irq_enter(void) -{ -} -EXPORT_SYMBOL_GPL(rcu_irq_enter); - #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) /* @@ -170,6 +137,11 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) /* Move the ready-to-invoke callbacks to a local list. */ local_irq_save(flags); + if (rcp->donetail == &rcp->rcucblist) { + /* No callbacks ready, so just leave. */ + local_irq_restore(flags); + return; + } RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); list = rcp->rcucblist; rcp->rcucblist = *rcp->donetail; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 0628df155970..add042926a66 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -91,7 +91,7 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ DEFINE_RCU_TPS(sname) \ -DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ +static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ struct rcu_state sname##_state = { \ .level = { &sname##_state.node[0] }, \ .rda = &sname##_data, \ @@ -110,11 +110,18 @@ struct rcu_state sname##_state = { \ RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); -static struct rcu_state *rcu_state_p; +static struct rcu_state *const rcu_state_p; +static struct rcu_data __percpu *const rcu_data_p; LIST_HEAD(rcu_struct_flavors); -/* Increase (but not decrease) the CONFIG_RCU_FANOUT_LEAF at boot time. */ -static int rcu_fanout_leaf = CONFIG_RCU_FANOUT_LEAF; +/* Dump rcu_node combining tree at boot to verify correct setup. */ +static bool dump_tree; +module_param(dump_tree, bool, 0444); +/* Control rcu_node-tree auto-balancing at boot time. */ +static bool rcu_fanout_exact; +module_param(rcu_fanout_exact, bool, 0444); +/* Increase (but not decrease) the RCU_FANOUT_LEAF at boot time. */ +static int rcu_fanout_leaf = RCU_FANOUT_LEAF; module_param(rcu_fanout_leaf, int, 0444); int rcu_num_lvls __read_mostly = RCU_NUM_LVLS; static int num_rcu_lvl[] = { /* Number of rcu_nodes at specified level. */ @@ -159,17 +166,46 @@ static void invoke_rcu_core(void); static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); /* rcuc/rcub kthread realtime priority */ +#ifdef CONFIG_RCU_KTHREAD_PRIO static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; +#else /* #ifdef CONFIG_RCU_KTHREAD_PRIO */ +static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0; +#endif /* #else #ifdef CONFIG_RCU_KTHREAD_PRIO */ module_param(kthread_prio, int, 0644); /* Delay in jiffies for grace-period initialization delays, debug only. */ + +#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT +static int gp_preinit_delay = CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY; +module_param(gp_preinit_delay, int, 0644); +#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ +static const int gp_preinit_delay; +#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ + #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY; module_param(gp_init_delay, int, 0644); #else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ static const int gp_init_delay; #endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */ -#define PER_RCU_NODE_PERIOD 10 /* Number of grace periods between delays. */ + +#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP +static int gp_cleanup_delay = CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY; +module_param(gp_cleanup_delay, int, 0644); +#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */ +static const int gp_cleanup_delay; +#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */ + +/* + * Number of grace periods between delays, normalized by the duration of + * the delay. The longer the the delay, the more the grace periods between + * each delay. The reason for this normalization is that it means that, + * for non-zero delays, the overall slowdown of grace periods is constant + * regardless of the duration of the delay. This arrangement balances + * the need for long delays to increase some race probabilities with the + * need for fast grace periods to increase other race probabilities. + */ +#define PER_RCU_NODE_PERIOD 3 /* Number of grace periods between delays. */ /* * Track the rcutorture test sequence number and the update version @@ -585,7 +621,8 @@ static void rcu_eqs_enter_common(long long oldval, bool user) struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); - if (!user && !is_idle_task(current)) { + if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); @@ -604,7 +641,8 @@ static void rcu_eqs_enter_common(long long oldval, bool user) smp_mb__before_atomic(); /* See above. */ atomic_inc(&rdtp->dynticks); smp_mb__after_atomic(); /* Force ordering with next sojourn. */ - WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + atomic_read(&rdtp->dynticks) & 0x1); rcu_dynticks_task_enter(); /* @@ -630,7 +668,8 @@ static void rcu_eqs_enter(bool user) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; - WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + (oldval & DYNTICK_TASK_NEST_MASK) == 0); if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) { rdtp->dynticks_nesting = 0; rcu_eqs_enter_common(oldval, user); @@ -703,7 +742,8 @@ void rcu_irq_exit(void) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting--; - WARN_ON_ONCE(rdtp->dynticks_nesting < 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + rdtp->dynticks_nesting < 0); if (rdtp->dynticks_nesting) trace_rcu_dyntick(TPS("--="), oldval, rdtp->dynticks_nesting); else @@ -728,10 +768,12 @@ static void rcu_eqs_exit_common(long long oldval, int user) atomic_inc(&rdtp->dynticks); /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ smp_mb__after_atomic(); /* See above. */ - WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + !(atomic_read(&rdtp->dynticks) & 0x1)); rcu_cleanup_after_idle(); trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); - if (!user && !is_idle_task(current)) { + if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + !user && !is_idle_task(current)) { struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); @@ -755,7 +797,7 @@ static void rcu_eqs_exit(bool user) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; - WARN_ON_ONCE(oldval < 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); if (oldval & DYNTICK_TASK_NEST_MASK) { rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; } else { @@ -828,7 +870,8 @@ void rcu_irq_enter(void) rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting++; - WARN_ON_ONCE(rdtp->dynticks_nesting == 0); + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && + rdtp->dynticks_nesting == 0); if (oldval) trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); else @@ -1135,8 +1178,9 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp) j = jiffies; gpa = READ_ONCE(rsp->gp_activity); if (j - gpa > 2 * HZ) - pr_err("%s kthread starved for %ld jiffies!\n", - rsp->name, j - gpa); + pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x\n", + rsp->name, j - gpa, + rsp->gpnum, rsp->completed, rsp->gp_flags); } /* @@ -1732,6 +1776,13 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) rcu_gp_kthread_wake(rsp); } +static void rcu_gp_slow(struct rcu_state *rsp, int delay) +{ + if (delay > 0 && + !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay))) + schedule_timeout_uninterruptible(delay); +} + /* * Initialize a new grace period. Return 0 if no grace period required. */ @@ -1774,6 +1825,7 @@ static int rcu_gp_init(struct rcu_state *rsp) * will handle subsequent offline CPUs. */ rcu_for_each_leaf_node(rsp, rnp) { + rcu_gp_slow(rsp, gp_preinit_delay); raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); if (rnp->qsmaskinit == rnp->qsmaskinitnext && @@ -1830,6 +1882,7 @@ static int rcu_gp_init(struct rcu_state *rsp) * process finishes, because this kthread handles both. */ rcu_for_each_node_breadth_first(rsp, rnp) { + rcu_gp_slow(rsp, gp_init_delay); raw_spin_lock_irq(&rnp->lock); smp_mb__after_unlock_lock(); rdp = this_cpu_ptr(rsp->rda); @@ -1847,9 +1900,6 @@ static int rcu_gp_init(struct rcu_state *rsp) raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); WRITE_ONCE(rsp->gp_activity, jiffies); - if (gp_init_delay > 0 && - !(rsp->gpnum % (rcu_num_nodes * PER_RCU_NODE_PERIOD))) - schedule_timeout_uninterruptible(gp_init_delay); } return 1; @@ -1944,6 +1994,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) raw_spin_unlock_irq(&rnp->lock); cond_resched_rcu_qs(); WRITE_ONCE(rsp->gp_activity, jiffies); + rcu_gp_slow(rsp, gp_cleanup_delay); } rnp = rcu_get_root(rsp); raw_spin_lock_irq(&rnp->lock); @@ -2138,6 +2189,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) __releases(rcu_get_root(rsp)->lock) { WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); + WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); rcu_gp_kthread_wake(rsp); } @@ -2335,8 +2387,6 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) rcu_report_qs_rdp(rdp->cpu, rsp, rdp); } -#ifdef CONFIG_HOTPLUG_CPU - /* * Send the specified CPU's RCU callbacks to the orphanage. The * specified CPU must be offline, and the caller must hold the @@ -2347,7 +2397,7 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { /* No-CBs CPUs do not have orphanable callbacks. */ - if (rcu_is_nocb_cpu(rdp->cpu)) + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu)) return; /* @@ -2406,7 +2456,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); /* No-CBs CPUs are handled specially. */ - if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || + rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) return; /* Do the accounting first. */ @@ -2453,6 +2504,9 @@ static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda)); RCU_TRACE(struct rcu_node *rnp = rdp->mynode); + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return; + RCU_TRACE(mask = rdp->grpmask); trace_rcu_grace_period(rsp->name, rnp->gpnum + 1 - !!(rnp->qsmask & mask), @@ -2481,7 +2535,8 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) long mask; struct rcu_node *rnp = rnp_leaf; - if (rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || + rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) return; for (;;) { mask = rnp->grpmask; @@ -2512,6 +2567,9 @@ static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return; + /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ mask = rdp->grpmask; raw_spin_lock_irqsave(&rnp->lock, flags); @@ -2533,6 +2591,9 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return; + /* Adjust any no-longer-needed kthreads. */ rcu_boost_kthread_setaffinity(rnp, -1); @@ -2547,26 +2608,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) cpu, rdp->qlen, rdp->nxtlist); } -#else /* #ifdef CONFIG_HOTPLUG_CPU */ - -static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) -{ -} - -static void __maybe_unused rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) -{ -} - -static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) -{ -} - -static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) -{ -} - -#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ - /* * Invoke any RCU callbacks that have made it to the end of their grace * period. Thottle as specified by rdp->blimit. @@ -2731,10 +2772,6 @@ static void force_qs_rnp(struct rcu_state *rsp, mask = 0; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); - if (!rcu_gp_in_progress(rsp)) { - raw_spin_unlock_irqrestore(&rnp->lock, flags); - return; - } if (rnp->qsmask == 0) { if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p || @@ -2764,8 +2801,6 @@ static void force_qs_rnp(struct rcu_state *rsp, bit = 1; for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { if ((rnp->qsmask & bit) != 0) { - if ((rnp->qsmaskinit & bit) == 0) - *isidle = false; /* Pending hotplug. */ if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) mask |= bit; } @@ -3287,7 +3322,7 @@ void synchronize_sched_expedited(void) if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start), (ulong)atomic_long_read(&rsp->expedited_done) + ULONG_MAX / 8)) { - synchronize_sched(); + wait_rcu_gp(call_rcu_sched); atomic_long_inc(&rsp->expedited_wrap); return; } @@ -3493,7 +3528,7 @@ static int rcu_pending(void) * non-NULL, store an indication of whether all callbacks are lazy. * (If there are no callbacks, all of them are deemed to be lazy.) */ -static int __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy) +static bool __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy) { bool al = true; bool hc = false; @@ -3780,7 +3815,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */ rdp->completed = rnp->completed; rdp->passed_quiesce = false; - rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr); + rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu); rdp->qs_pending = false; trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl")); raw_spin_unlock_irqrestore(&rnp->lock, flags); @@ -3924,16 +3959,16 @@ void rcu_scheduler_starting(void) /* * Compute the per-level fanout, either using the exact fanout specified - * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. + * or balancing the tree, depending on the rcu_fanout_exact boot parameter. */ static void __init rcu_init_levelspread(struct rcu_state *rsp) { int i; - if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) { + if (rcu_fanout_exact) { rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; for (i = rcu_num_lvls - 2; i >= 0; i--) - rsp->levelspread[i] = CONFIG_RCU_FANOUT; + rsp->levelspread[i] = RCU_FANOUT; } else { int ccur; int cprv; @@ -3971,9 +4006,9 @@ static void __init rcu_init_one(struct rcu_state *rsp, BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */ - /* Silence gcc 4.8 warning about array index out of range. */ - if (rcu_num_lvls > RCU_NUM_LVLS) - panic("rcu_init_one: rcu_num_lvls overflow"); + /* Silence gcc 4.8 false positive about array index out of range. */ + if (rcu_num_lvls <= 0 || rcu_num_lvls > RCU_NUM_LVLS) + panic("rcu_init_one: rcu_num_lvls out of range"); /* Initialize the level-tracking arrays. */ @@ -4059,7 +4094,7 @@ static void __init rcu_init_geometry(void) jiffies_till_next_fqs = d; /* If the compile-time values are accurate, just leave. */ - if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF && + if (rcu_fanout_leaf == RCU_FANOUT_LEAF && nr_cpu_ids == NR_CPUS) return; pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n", @@ -4073,7 +4108,7 @@ static void __init rcu_init_geometry(void) rcu_capacity[0] = 1; rcu_capacity[1] = rcu_fanout_leaf; for (i = 2; i <= MAX_RCU_LVLS; i++) - rcu_capacity[i] = rcu_capacity[i - 1] * CONFIG_RCU_FANOUT; + rcu_capacity[i] = rcu_capacity[i - 1] * RCU_FANOUT; /* * The boot-time rcu_fanout_leaf parameter is only permitted @@ -4083,7 +4118,7 @@ static void __init rcu_init_geometry(void) * the configured number of CPUs. Complain and fall back to the * compile-time values if these limits are exceeded. */ - if (rcu_fanout_leaf < CONFIG_RCU_FANOUT_LEAF || + if (rcu_fanout_leaf < RCU_FANOUT_LEAF || rcu_fanout_leaf > sizeof(unsigned long) * 8 || n > rcu_capacity[MAX_RCU_LVLS]) { WARN_ON(1); @@ -4109,6 +4144,28 @@ static void __init rcu_init_geometry(void) rcu_num_nodes -= n; } +/* + * Dump out the structure of the rcu_node combining tree associated + * with the rcu_state structure referenced by rsp. + */ +static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp) +{ + int level = 0; + struct rcu_node *rnp; + + pr_info("rcu_node tree layout dump\n"); + pr_info(" "); + rcu_for_each_node_breadth_first(rsp, rnp) { + if (rnp->level != level) { + pr_cont("\n"); + pr_info(" "); + level = rnp->level; + } + pr_cont("%d:%d ^%d ", rnp->grplo, rnp->grphi, rnp->grpnum); + } + pr_cont("\n"); +} + void __init rcu_init(void) { int cpu; @@ -4119,6 +4176,8 @@ void __init rcu_init(void) rcu_init_geometry(); rcu_init_one(&rcu_bh_state, &rcu_bh_data); rcu_init_one(&rcu_sched_state, &rcu_sched_data); + if (dump_tree) + rcu_dump_rcu_node_tree(&rcu_sched_state); __rcu_init_preempt(); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index a69d3dab2ec4..4adb7ca0bf47 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -35,11 +35,33 @@ * In practice, this did work well going from three levels to four. * Of course, your mileage may vary. */ + #define MAX_RCU_LVLS 4 -#define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF) -#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) -#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) -#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) + +#ifdef CONFIG_RCU_FANOUT +#define RCU_FANOUT CONFIG_RCU_FANOUT +#else /* #ifdef CONFIG_RCU_FANOUT */ +# ifdef CONFIG_64BIT +# define RCU_FANOUT 64 +# else +# define RCU_FANOUT 32 +# endif +#endif /* #else #ifdef CONFIG_RCU_FANOUT */ + +#ifdef CONFIG_RCU_FANOUT_LEAF +#define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF +#else /* #ifdef CONFIG_RCU_FANOUT_LEAF */ +# ifdef CONFIG_64BIT +# define RCU_FANOUT_LEAF 64 +# else +# define RCU_FANOUT_LEAF 32 +# endif +#endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */ + +#define RCU_FANOUT_1 (RCU_FANOUT_LEAF) +#define RCU_FANOUT_2 (RCU_FANOUT_1 * RCU_FANOUT) +#define RCU_FANOUT_3 (RCU_FANOUT_2 * RCU_FANOUT) +#define RCU_FANOUT_4 (RCU_FANOUT_3 * RCU_FANOUT) #if NR_CPUS <= RCU_FANOUT_1 # define RCU_NUM_LVLS 1 @@ -170,7 +192,6 @@ struct rcu_node { /* if there is no such task. If there */ /* is no current expedited grace period, */ /* then there can cannot be any such task. */ -#ifdef CONFIG_RCU_BOOST struct list_head *boost_tasks; /* Pointer to first task that needs to be */ /* priority boosted, or NULL if no priority */ @@ -208,7 +229,6 @@ struct rcu_node { unsigned long n_balk_nos; /* Refused to boost: not sure why, though. */ /* This can happen due to race conditions. */ -#endif /* #ifdef CONFIG_RCU_BOOST */ #ifdef CONFIG_RCU_NOCB_CPU wait_queue_head_t nocb_gp_wq[2]; /* Place for rcu_nocb_kthread() to wait GP. */ @@ -519,14 +539,11 @@ extern struct list_head rcu_struct_flavors; * RCU implementation internal declarations: */ extern struct rcu_state rcu_sched_state; -DECLARE_PER_CPU(struct rcu_data, rcu_sched_data); extern struct rcu_state rcu_bh_state; -DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); #ifdef CONFIG_PREEMPT_RCU extern struct rcu_state rcu_preempt_state; -DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_RCU_BOOST diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 58b1ebdc4387..32664347091a 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -43,7 +43,17 @@ DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); DEFINE_PER_CPU(char, rcu_cpu_has_work); -#endif /* #ifdef CONFIG_RCU_BOOST */ +#else /* #ifdef CONFIG_RCU_BOOST */ + +/* + * Some architectures do not define rt_mutexes, but if !CONFIG_RCU_BOOST, + * all uses are in dead code. Provide a definition to keep the compiler + * happy, but add WARN_ON_ONCE() to complain if used in the wrong place. + * This probably needs to be excluded from -rt builds. + */ +#define rt_mutex_owner(a) ({ WARN_ON_ONCE(1); NULL; }) + +#endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifdef CONFIG_RCU_NOCB_CPU static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ @@ -60,11 +70,11 @@ static void __init rcu_bootup_announce_oddness(void) { if (IS_ENABLED(CONFIG_RCU_TRACE)) pr_info("\tRCU debugfs-based tracing is enabled.\n"); - if ((IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || - (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)) + if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) || + (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32)) pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", - CONFIG_RCU_FANOUT); - if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) + RCU_FANOUT); + if (rcu_fanout_exact) pr_info("\tHierarchical RCU autobalancing is disabled.\n"); if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); @@ -76,10 +86,10 @@ static void __init rcu_bootup_announce_oddness(void) pr_info("\tAdditional per-CPU info printed with stalls.\n"); if (NUM_RCU_LVL_4 != 0) pr_info("\tFour-level hierarchy is enabled.\n"); - if (CONFIG_RCU_FANOUT_LEAF != 16) + if (RCU_FANOUT_LEAF != 16) pr_info("\tBuild-time adjustment of leaf fanout to %d.\n", - CONFIG_RCU_FANOUT_LEAF); - if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) + RCU_FANOUT_LEAF); + if (rcu_fanout_leaf != RCU_FANOUT_LEAF) pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); if (nr_cpu_ids != NR_CPUS) pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); @@ -90,7 +100,8 @@ static void __init rcu_bootup_announce_oddness(void) #ifdef CONFIG_PREEMPT_RCU RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); -static struct rcu_state *rcu_state_p = &rcu_preempt_state; +static struct rcu_state *const rcu_state_p = &rcu_preempt_state; +static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data; static int rcu_preempted_readers_exp(struct rcu_node *rnp); static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, @@ -116,11 +127,11 @@ static void __init rcu_bootup_announce(void) */ static void rcu_preempt_qs(void) { - if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) { + if (!__this_cpu_read(rcu_data_p->passed_quiesce)) { trace_rcu_grace_period(TPS("rcu_preempt"), - __this_cpu_read(rcu_preempt_data.gpnum), + __this_cpu_read(rcu_data_p->gpnum), TPS("cpuqs")); - __this_cpu_write(rcu_preempt_data.passed_quiesce, 1); + __this_cpu_write(rcu_data_p->passed_quiesce, 1); barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ current->rcu_read_unlock_special.b.need_qs = false; } @@ -150,7 +161,7 @@ static void rcu_preempt_note_context_switch(void) !t->rcu_read_unlock_special.b.blocked) { /* Possibly blocking in an RCU read-side critical section. */ - rdp = this_cpu_ptr(rcu_preempt_state.rda); + rdp = this_cpu_ptr(rcu_state_p->rda); rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); smp_mb__after_unlock_lock(); @@ -180,10 +191,9 @@ static void rcu_preempt_note_context_switch(void) if ((rnp->qsmask & rdp->grpmask) && rnp->gp_tasks != NULL) { list_add(&t->rcu_node_entry, rnp->gp_tasks->prev); rnp->gp_tasks = &t->rcu_node_entry; -#ifdef CONFIG_RCU_BOOST - if (rnp->boost_tasks != NULL) + if (IS_ENABLED(CONFIG_RCU_BOOST) && + rnp->boost_tasks != NULL) rnp->boost_tasks = rnp->gp_tasks; -#endif /* #ifdef CONFIG_RCU_BOOST */ } else { list_add(&t->rcu_node_entry, &rnp->blkd_tasks); if (rnp->qsmask & rdp->grpmask) @@ -263,9 +273,7 @@ void rcu_read_unlock_special(struct task_struct *t) bool empty_exp_now; unsigned long flags; struct list_head *np; -#ifdef CONFIG_RCU_BOOST bool drop_boost_mutex = false; -#endif /* #ifdef CONFIG_RCU_BOOST */ struct rcu_node *rnp; union rcu_special special; @@ -307,9 +315,11 @@ void rcu_read_unlock_special(struct task_struct *t) t->rcu_read_unlock_special.b.blocked = false; /* - * Remove this task from the list it blocked on. The - * task can migrate while we acquire the lock, but at - * most one time. So at most two passes through loop. + * Remove this task from the list it blocked on. The task + * now remains queued on the rcu_node corresponding to + * the CPU it first blocked on, so the first attempt to + * acquire the task's rcu_node's ->lock will succeed. + * Keep the loop and add a WARN_ON() out of sheer paranoia. */ for (;;) { rnp = t->rcu_blocked_node; @@ -317,6 +327,7 @@ void rcu_read_unlock_special(struct task_struct *t) smp_mb__after_unlock_lock(); if (rnp == t->rcu_blocked_node) break; + WARN_ON_ONCE(1); raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); @@ -331,12 +342,12 @@ void rcu_read_unlock_special(struct task_struct *t) rnp->gp_tasks = np; if (&t->rcu_node_entry == rnp->exp_tasks) rnp->exp_tasks = np; -#ifdef CONFIG_RCU_BOOST - if (&t->rcu_node_entry == rnp->boost_tasks) - rnp->boost_tasks = np; - /* Snapshot ->boost_mtx ownership with rcu_node lock held. */ - drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; -#endif /* #ifdef CONFIG_RCU_BOOST */ + if (IS_ENABLED(CONFIG_RCU_BOOST)) { + if (&t->rcu_node_entry == rnp->boost_tasks) + rnp->boost_tasks = np; + /* Snapshot ->boost_mtx ownership w/rnp->lock held. */ + drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t; + } /* * If this was the last task on the current list, and if @@ -353,24 +364,21 @@ void rcu_read_unlock_special(struct task_struct *t) rnp->grplo, rnp->grphi, !!rnp->gp_tasks); - rcu_report_unblock_qs_rnp(&rcu_preempt_state, - rnp, flags); + rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags); } else { raw_spin_unlock_irqrestore(&rnp->lock, flags); } -#ifdef CONFIG_RCU_BOOST /* Unboost if we were boosted. */ - if (drop_boost_mutex) + if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) rt_mutex_unlock(&rnp->boost_mtx); -#endif /* #ifdef CONFIG_RCU_BOOST */ /* * If this was the last task on the expedited lists, * then we need to report up the rcu_node hierarchy. */ if (!empty_exp && empty_exp_now) - rcu_report_exp_rnp(&rcu_preempt_state, rnp, true); + rcu_report_exp_rnp(rcu_state_p, rnp, true); } else { local_irq_restore(flags); } @@ -390,7 +398,7 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) raw_spin_unlock_irqrestore(&rnp->lock, flags); return; } - t = list_entry(rnp->gp_tasks, + t = list_entry(rnp->gp_tasks->prev, struct task_struct, rcu_node_entry); list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) sched_show_task(t); @@ -447,7 +455,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp) if (!rcu_preempt_blocked_readers_cgp(rnp)) return 0; rcu_print_task_stall_begin(rnp); - t = list_entry(rnp->gp_tasks, + t = list_entry(rnp->gp_tasks->prev, struct task_struct, rcu_node_entry); list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) { pr_cont(" P%d", t->pid); @@ -491,8 +499,8 @@ static void rcu_preempt_check_callbacks(void) return; } if (t->rcu_read_lock_nesting > 0 && - __this_cpu_read(rcu_preempt_data.qs_pending) && - !__this_cpu_read(rcu_preempt_data.passed_quiesce)) + __this_cpu_read(rcu_data_p->qs_pending) && + !__this_cpu_read(rcu_data_p->passed_quiesce)) t->rcu_read_unlock_special.b.need_qs = true; } @@ -500,7 +508,7 @@ static void rcu_preempt_check_callbacks(void) static void rcu_preempt_do_callbacks(void) { - rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data)); + rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p)); } #endif /* #ifdef CONFIG_RCU_BOOST */ @@ -510,7 +518,7 @@ static void rcu_preempt_do_callbacks(void) */ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { - __call_rcu(head, func, &rcu_preempt_state, -1, 0); + __call_rcu(head, func, rcu_state_p, -1, 0); } EXPORT_SYMBOL_GPL(call_rcu); @@ -711,7 +719,7 @@ sync_rcu_preempt_exp_init2(struct rcu_state *rsp, struct rcu_node *rnp) void synchronize_rcu_expedited(void) { struct rcu_node *rnp; - struct rcu_state *rsp = &rcu_preempt_state; + struct rcu_state *rsp = rcu_state_p; unsigned long snap; int trycount = 0; @@ -798,7 +806,7 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); */ void rcu_barrier(void) { - _rcu_barrier(&rcu_preempt_state); + _rcu_barrier(rcu_state_p); } EXPORT_SYMBOL_GPL(rcu_barrier); @@ -807,7 +815,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier); */ static void __init __rcu_init_preempt(void) { - rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); + rcu_init_one(rcu_state_p, rcu_data_p); } /* @@ -830,7 +838,8 @@ void exit_rcu(void) #else /* #ifdef CONFIG_PREEMPT_RCU */ -static struct rcu_state *rcu_state_p = &rcu_sched_state; +static struct rcu_state *const rcu_state_p = &rcu_sched_state; +static struct rcu_data __percpu *const rcu_data_p = &rcu_sched_data; /* * Tell them what RCU they are running. @@ -1172,7 +1181,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct sched_param sp; struct task_struct *t; - if (&rcu_preempt_state != rsp) + if (rcu_state_p != rsp) return 0; if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0) @@ -1366,13 +1375,12 @@ static void rcu_prepare_kthreads(int cpu) * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs * any flavor of RCU. */ -#ifndef CONFIG_RCU_NOCB_CPU_ALL int rcu_needs_cpu(unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; - return rcu_cpu_has_callbacks(NULL); + return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) + ? 0 : rcu_cpu_has_callbacks(NULL); } -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ /* * Because we do not have RCU_FAST_NO_HZ, don't bother cleaning up @@ -1479,11 +1487,15 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) * * The caller must have disabled interrupts. */ -#ifndef CONFIG_RCU_NOCB_CPU_ALL int rcu_needs_cpu(unsigned long *dj) { struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) { + *dj = ULONG_MAX; + return 0; + } + /* Snapshot to detect later posting of non-lazy callback. */ rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; @@ -1510,7 +1522,6 @@ int rcu_needs_cpu(unsigned long *dj) } return 0; } -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ /* * Prepare a CPU for idle from an RCU perspective. The first major task @@ -1524,7 +1535,6 @@ int rcu_needs_cpu(unsigned long *dj) */ static void rcu_prepare_for_idle(void) { -#ifndef CONFIG_RCU_NOCB_CPU_ALL bool needwake; struct rcu_data *rdp; struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); @@ -1532,6 +1542,9 @@ static void rcu_prepare_for_idle(void) struct rcu_state *rsp; int tne; + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) + return; + /* Handle nohz enablement switches conservatively. */ tne = READ_ONCE(tick_nohz_active); if (tne != rdtp->tick_nohz_enabled_snap) { @@ -1579,7 +1592,6 @@ static void rcu_prepare_for_idle(void) if (needwake) rcu_gp_kthread_wake(rsp); } -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ } /* @@ -1589,12 +1601,11 @@ static void rcu_prepare_for_idle(void) */ static void rcu_cleanup_after_idle(void) { -#ifndef CONFIG_RCU_NOCB_CPU_ALL - if (rcu_is_nocb_cpu(smp_processor_id())) + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) || + rcu_is_nocb_cpu(smp_processor_id())) return; if (rcu_try_advance_all_cbs()) invoke_rcu_core(); -#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */ } /* @@ -3048,9 +3059,9 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp) if (tick_nohz_full_cpu(smp_processor_id()) && (!rcu_gp_in_progress(rsp) || ULONG_CMP_LT(jiffies, READ_ONCE(rsp->gp_start) + HZ))) - return 1; + return true; #endif /* #ifdef CONFIG_NO_HZ_FULL */ - return 0; + return false; } /* |