Merge branches 'doc.2024.06.06a', 'fixes.2024.07.04a', 'mb.2024.06.28a', 'nocb.2024.06.03a', 'rcu-tasks.2024.06.06a', 'rcutorture.2024.06.06a' and 'srcu.2024.06.18a' into HEAD

doc.2024.06.06a: Documentation updates.
fixes.2024.07.04a: Miscellaneous fixes.
mb.2024.06.28a: Grace-period memory-barrier redundancy removal.
nocb.2024.06.03a: No-CB CPU updates.
rcu-tasks.2024.06.06a: RCU-Tasks updates.
rcutorture.2024.06.06a: Torture-test updates.
srcu.2024.06.18a: SRCU polled-grace-period updates.
This commit is contained in:
Paul E. McKenney 2024-07-04 13:54:17 -07:00
22 changed files with 405 additions and 268 deletions

View file

@ -149,9 +149,9 @@ This case is handled by calls to the strongly ordered
``atomic_add_return()`` read-modify-write atomic operation that
is invoked within ``rcu_dynticks_eqs_enter()`` at idle-entry
time and within ``rcu_dynticks_eqs_exit()`` at idle-exit time.
The grace-period kthread invokes ``rcu_dynticks_snap()`` and
``rcu_dynticks_in_eqs_since()`` (both of which invoke
an ``atomic_add_return()`` of zero) to detect idle CPUs.
The grace-period kthread invokes first ``ct_dynticks_cpu_acquire()``
(preceded by a full memory barrier) and ``rcu_dynticks_in_eqs_since()``
(both of which rely on acquire semantics) to detect idle CPUs.
+-----------------------------------------------------------------------+
| **Quick Quiz**: |

View file

@ -5018,6 +5018,14 @@
the ->nocb_bypass queue. The definition of "too
many" is supplied by this kernel boot parameter.
rcutree.nohz_full_patience_delay= [KNL]
On callback-offloaded (rcu_nocbs) CPUs, avoid
disturbing RCU unless the grace period has
reached the specified age in milliseconds.
Defaults to zero. Large values will be capped
at five seconds. All values will be rounded down
to the nearest value representable by jiffies.
rcutree.qhimark= [KNL]
Set threshold of queued RCU callbacks beyond which
batch limiting is disabled.

View file

@ -18868,6 +18868,7 @@ M: Neeraj Upadhyay <neeraj.upadhyay@kernel.org> (kernel/rcu/tasks.h)
M: Joel Fernandes <joel@joelfernandes.org>
M: Josh Triplett <josh@joshtriplett.org>
M: Boqun Feng <boqun.feng@gmail.com>
M: Uladzislau Rezki <urezki@gmail.com>
R: Steven Rostedt <rostedt@goodmis.org>
R: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
R: Lai Jiangshan <jiangshanlai@gmail.com>

View file

@ -80,36 +80,35 @@ struct rcu_cblist {
* | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED |
* | |
* | Callbacks processed by rcu_core() from softirqs or local |
* | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads, |
* | allowing nocb_timer to be armed. |
* | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads. |
* ----------------------------------------------------------------------------
* |
* v
* -----------------------------------
* | |
* v v
* --------------------------------------- ----------------------------------|
* | SEGCBLIST_RCU_CORE | | | SEGCBLIST_RCU_CORE | |
* | SEGCBLIST_LOCKING | | | SEGCBLIST_LOCKING | |
* | SEGCBLIST_OFFLOADED | | | SEGCBLIST_OFFLOADED | |
* | SEGCBLIST_KTHREAD_CB | | SEGCBLIST_KTHREAD_GP |
* | | | |
* | | | |
* | CB kthread woke up and | | GP kthread woke up and |
* | acknowledged SEGCBLIST_OFFLOADED. | | acknowledged SEGCBLIST_OFFLOADED|
* | Processes callbacks concurrently | | |
* | with rcu_core(), holding | | |
* | nocb_lock. | | |
* --------------------------------------- -----------------------------------
* | |
* -----------------------------------
* ----------------------------------------------------------------------------
* | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED |
* | + unparked CB kthread |
* | |
* | CB kthread got unparked and processes callbacks concurrently with |
* | rcu_core(), holding nocb_lock. |
* ---------------------------------------------------------------------------
* |
* v
* ---------------------------------------------------------------------------|
* | SEGCBLIST_RCU_CORE | |
* | SEGCBLIST_LOCKING | |
* | SEGCBLIST_OFFLOADED | |
* | SEGCBLIST_KTHREAD_GP |
* | + unparked CB kthread |
* | |
* | GP kthread woke up and acknowledged nocb_lock. |
* ---------------------------------------- -----------------------------------
* |
* v
* |--------------------------------------------------------------------------|
* | SEGCBLIST_LOCKING | |
* | SEGCBLIST_OFFLOADED | |
* | SEGCBLIST_LOCKING | |
* | SEGCBLIST_OFFLOADED | |
* | SEGCBLIST_KTHREAD_GP | |
* | SEGCBLIST_KTHREAD_CB |
* | + unparked CB kthread |
* | |
* | Kthreads handle callbacks holding nocb_lock, local rcu_core() stops |
* | handling callbacks. Enable bypass queueing. |
@ -125,8 +124,8 @@ struct rcu_cblist {
* |--------------------------------------------------------------------------|
* | SEGCBLIST_LOCKING | |
* | SEGCBLIST_OFFLOADED | |
* | SEGCBLIST_KTHREAD_CB | |
* | SEGCBLIST_KTHREAD_GP |
* | + unparked CB kthread |
* | |
* | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() |
* | ignores callbacks. Bypass enqueue is enabled. |
@ -137,11 +136,11 @@ struct rcu_cblist {
* | SEGCBLIST_RCU_CORE | |
* | SEGCBLIST_LOCKING | |
* | SEGCBLIST_OFFLOADED | |
* | SEGCBLIST_KTHREAD_CB | |
* | SEGCBLIST_KTHREAD_GP |
* | + unparked CB kthread |
* | |
* | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() |
* | handles callbacks concurrently. Bypass enqueue is enabled. |
* | handles callbacks concurrently. Bypass enqueue is disabled. |
* | Invoke RCU core so we make sure not to preempt it in the middle with |
* | leaving some urgent work unattended within a jiffy. |
* ----------------------------------------------------------------------------
@ -150,42 +149,31 @@ struct rcu_cblist {
* |--------------------------------------------------------------------------|
* | SEGCBLIST_RCU_CORE | |
* | SEGCBLIST_LOCKING | |
* | SEGCBLIST_KTHREAD_CB | |
* | SEGCBLIST_KTHREAD_GP |
* | + unparked CB kthread |
* | |
* | CB/GP kthreads and local rcu_core() handle callbacks concurrently |
* | holding nocb_lock. Wake up CB and GP kthreads if necessary. Disable |
* | bypass enqueue. |
* | holding nocb_lock. Wake up GP kthread if necessary. |
* ----------------------------------------------------------------------------
* |
* v
* -----------------------------------
* | |
* v v
* ---------------------------------------------------------------------------|
* | | |
* | SEGCBLIST_RCU_CORE | | SEGCBLIST_RCU_CORE | |
* | SEGCBLIST_LOCKING | | SEGCBLIST_LOCKING | |
* | SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP |
* | | |
* | GP kthread woke up and | CB kthread woke up and |
* | acknowledged the fact that | acknowledged the fact that |
* | SEGCBLIST_OFFLOADED got cleared. | SEGCBLIST_OFFLOADED got cleared. |
* | | The CB kthread goes to sleep |
* | The callbacks from the target CPU | until it ever gets re-offloaded. |
* | will be ignored from the GP kthread | |
* | loop. | |
* |--------------------------------------------------------------------------|
* | SEGCBLIST_RCU_CORE | |
* | SEGCBLIST_LOCKING | |
* | + unparked CB kthread |
* | |
* | GP kthread woke up and acknowledged the fact that SEGCBLIST_OFFLOADED |
* | got cleared. The callbacks from the target CPU will be ignored from the|
* | GP kthread loop. |
* ----------------------------------------------------------------------------
* | |
* -----------------------------------
* |
* v
* ----------------------------------------------------------------------------
* | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING |
* | + parked CB kthread |
* | |
* | Callbacks processed by rcu_core() from softirqs or local |
* | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed. |
* | Flush pending nocb_timer. Flush nocb bypass callbacks. |
* | CB kthread is parked. Callbacks processed by rcu_core() from softirqs or |
* | local rcuc kthread, while holding nocb_lock. |
* ----------------------------------------------------------------------------
* |
* v

View file

@ -209,7 +209,6 @@ void synchronize_rcu_tasks_rude(void);
#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
void exit_tasks_rcu_start(void);
void exit_tasks_rcu_stop(void);
void exit_tasks_rcu_finish(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
#define rcu_tasks_classic_qs(t, preempt) do { } while (0)
@ -218,7 +217,6 @@ void exit_tasks_rcu_finish(void);
#define call_rcu_tasks call_rcu
#define synchronize_rcu_tasks synchronize_rcu
static inline void exit_tasks_rcu_start(void) { }
static inline void exit_tasks_rcu_stop(void) { }
static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
@ -421,11 +419,71 @@ static inline void rcu_preempt_sleep_check(void) { }
"Illegal context switch in RCU-sched read-side critical section"); \
} while (0)
// See RCU_LOCKDEP_WARN() for an explanation of the double call to
// debug_lockdep_rcu_enabled().
static inline bool lockdep_assert_rcu_helper(bool c)
{
return debug_lockdep_rcu_enabled() &&
(c || !rcu_is_watching() || !rcu_lockdep_current_cpu_online()) &&
debug_lockdep_rcu_enabled();
}
/**
* lockdep_assert_in_rcu_read_lock - WARN if not protected by rcu_read_lock()
*
* Splats if lockdep is enabled and there is no rcu_read_lock() in effect.
*/
#define lockdep_assert_in_rcu_read_lock() \
WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map)))
/**
* lockdep_assert_in_rcu_read_lock_bh - WARN if not protected by rcu_read_lock_bh()
*
* Splats if lockdep is enabled and there is no rcu_read_lock_bh() in effect.
* Note that local_bh_disable() and friends do not suffice here, instead an
* actual rcu_read_lock_bh() is required.
*/
#define lockdep_assert_in_rcu_read_lock_bh() \
WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_bh_lock_map)))
/**
* lockdep_assert_in_rcu_read_lock_sched - WARN if not protected by rcu_read_lock_sched()
*
* Splats if lockdep is enabled and there is no rcu_read_lock_sched()
* in effect. Note that preempt_disable() and friends do not suffice here,
* instead an actual rcu_read_lock_sched() is required.
*/
#define lockdep_assert_in_rcu_read_lock_sched() \
WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_sched_lock_map)))
/**
* lockdep_assert_in_rcu_reader - WARN if not within some type of RCU reader
*
* Splats if lockdep is enabled and there is no RCU reader of any
* type in effect. Note that regions of code protected by things like
* preempt_disable, local_bh_disable(), and local_irq_disable() all qualify
* as RCU readers.
*
* Note that this will never trigger in PREEMPT_NONE or PREEMPT_VOLUNTARY
* kernels that are not also built with PREEMPT_COUNT. But if you have
* lockdep enabled, you might as well also enable PREEMPT_COUNT.
*/
#define lockdep_assert_in_rcu_reader() \
WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map) && \
!lock_is_held(&rcu_bh_lock_map) && \
!lock_is_held(&rcu_sched_lock_map) && \
preemptible()))
#else /* #ifdef CONFIG_PROVE_RCU */
#define RCU_LOCKDEP_WARN(c, s) do { } while (0 && (c))
#define rcu_sleep_check() do { } while (0)
#define lockdep_assert_in_rcu_read_lock() do { } while (0)
#define lockdep_assert_in_rcu_read_lock_bh() do { } while (0)
#define lockdep_assert_in_rcu_read_lock_sched() do { } while (0)
#define lockdep_assert_in_rcu_reader() do { } while (0)
#endif /* #else #ifdef CONFIG_PROVE_RCU */
/*

View file

@ -57,10 +57,45 @@ void cleanup_srcu_struct(struct srcu_struct *ssp);
int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
void synchronize_srcu(struct srcu_struct *ssp);
#define SRCU_GET_STATE_COMPLETED 0x1
/**
* get_completed_synchronize_srcu - Return a pre-completed polled state cookie
*
* Returns a value that poll_state_synchronize_srcu() will always treat
* as a cookie whose grace period has already completed.
*/
static inline unsigned long get_completed_synchronize_srcu(void)
{
return SRCU_GET_STATE_COMPLETED;
}
unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp);
unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp);
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie);
// Maximum number of unsigned long values corresponding to
// not-yet-completed SRCU grace periods.
#define NUM_ACTIVE_SRCU_POLL_OLDSTATE 2
/**
* same_state_synchronize_srcu - Are two old-state values identical?
* @oldstate1: First old-state value.
* @oldstate2: Second old-state value.
*
* The two old-state values must have been obtained from either
* get_state_synchronize_srcu(), start_poll_synchronize_srcu(), or
* get_completed_synchronize_srcu(). Returns @true if the two values are
* identical and @false otherwise. This allows structures whose lifetimes
* are tracked by old-state values to push these values to a list header,
* allowing those structures to be slightly smaller.
*/
static inline bool same_state_synchronize_srcu(unsigned long oldstate1, unsigned long oldstate2)
{
return oldstate1 == oldstate2;
}
#ifdef CONFIG_NEED_SRCU_NMI_SAFE
int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp);
void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp);

View file

@ -248,24 +248,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
set_current_state(TASK_INTERRUPTIBLE);
if (pid_ns->pid_allocated == init_pids)
break;
/*
* Release tasks_rcu_exit_srcu to avoid following deadlock:
*
* 1) TASK A unshare(CLONE_NEWPID)
* 2) TASK A fork() twice -> TASK B (child reaper for new ns)
* and TASK C
* 3) TASK B exits, kills TASK C, waits for TASK A to reap it
* 4) TASK A calls synchronize_rcu_tasks()
* -> synchronize_srcu(tasks_rcu_exit_srcu)
* 5) *DEADLOCK*
*
* It is considered safe to release tasks_rcu_exit_srcu here
* because we assume the current task can not be concurrently
* reaped at this point.
*/
exit_tasks_rcu_stop();
schedule();
exit_tasks_rcu_start();
}
__set_current_state(TASK_RUNNING);

View file

@ -42,6 +42,7 @@
#include "rcu.h"
MODULE_DESCRIPTION("Read-Copy Update module-based scalability-test facility");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");

View file

@ -51,6 +51,7 @@
#include "rcu.h"
MODULE_DESCRIPTION("Read-Copy Update module-based torture test facility");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
@ -390,6 +391,7 @@ struct rcu_torture_ops {
int extendables;
int slow_gps;
int no_pi_lock;
int debug_objects;
const char *name;
};
@ -577,6 +579,7 @@ static struct rcu_torture_ops rcu_ops = {
.irq_capable = 1,
.can_boost = IS_ENABLED(CONFIG_RCU_BOOST),
.extendables = RCUTORTURE_MAX_EXTEND,
.debug_objects = 1,
.name = "rcu"
};
@ -747,6 +750,7 @@ static struct rcu_torture_ops srcu_ops = {
.cbflood_max = 50000,
.irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.debug_objects = 1,
.name = "srcu"
};
@ -786,6 +790,7 @@ static struct rcu_torture_ops srcud_ops = {
.cbflood_max = 50000,
.irq_capable = 1,
.no_pi_lock = IS_ENABLED(CONFIG_TINY_SRCU),
.debug_objects = 1,
.name = "srcud"
};
@ -2626,7 +2631,7 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp)
spin_lock_irqsave(&rfp->rcu_fwd_lock, flags);
rfcpp = rfp->rcu_fwd_cb_tail;
rfp->rcu_fwd_cb_tail = &rfcp->rfc_next;
WRITE_ONCE(*rfcpp, rfcp);
smp_store_release(rfcpp, rfcp);
WRITE_ONCE(rfp->n_launders_cb, rfp->n_launders_cb + 1);
i = ((jiffies - rfp->rcu_fwd_startat) / (HZ / FWD_CBS_HIST_DIV));
if (i >= ARRAY_SIZE(rfp->n_launders_hist))
@ -3455,7 +3460,6 @@ rcu_torture_cleanup(void)
cur_ops->gp_slow_unregister(NULL);
}
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
static void rcu_torture_leak_cb(struct rcu_head *rhp)
{
}
@ -3473,7 +3477,6 @@ static void rcu_torture_err_cb(struct rcu_head *rhp)
*/
pr_alert("%s: duplicated callback was invoked.\n", KBUILD_MODNAME);
}
#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
/*
* Verify that double-free causes debug-objects to complain, but only
@ -3482,39 +3485,43 @@ static void rcu_torture_err_cb(struct rcu_head *rhp)
*/
static void rcu_test_debug_objects(void)
{
#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
struct rcu_head rh1;
struct rcu_head rh2;
int idx;
if (!IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD)) {
pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_%s()\n",
KBUILD_MODNAME, cur_ops->name);
return;
}
if (WARN_ON_ONCE(cur_ops->debug_objects &&
(!cur_ops->call || !cur_ops->cb_barrier)))
return;
struct rcu_head *rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
init_rcu_head_on_stack(&rh1);
init_rcu_head_on_stack(&rh2);
pr_alert("%s: WARN: Duplicate call_rcu() test starting.\n", KBUILD_MODNAME);
pr_alert("%s: WARN: Duplicate call_%s() test starting.\n", KBUILD_MODNAME, cur_ops->name);
/* Try to queue the rh2 pair of callbacks for the same grace period. */
preempt_disable(); /* Prevent preemption from interrupting test. */
rcu_read_lock(); /* Make it impossible to finish a grace period. */
call_rcu_hurry(&rh1, rcu_torture_leak_cb); /* Start grace period. */
local_irq_disable(); /* Make it harder to start a new grace period. */
call_rcu_hurry(&rh2, rcu_torture_leak_cb);
call_rcu_hurry(&rh2, rcu_torture_err_cb); /* Duplicate callback. */
idx = cur_ops->readlock(); /* Make it impossible to finish a grace period. */
cur_ops->call(&rh1, rcu_torture_leak_cb); /* Start grace period. */
cur_ops->call(&rh2, rcu_torture_leak_cb);
cur_ops->call(&rh2, rcu_torture_err_cb); /* Duplicate callback. */
if (rhp) {
call_rcu_hurry(rhp, rcu_torture_leak_cb);
call_rcu_hurry(rhp, rcu_torture_err_cb); /* Another duplicate callback. */
cur_ops->call(rhp, rcu_torture_leak_cb);
cur_ops->call(rhp, rcu_torture_err_cb); /* Another duplicate callback. */
}
local_irq_enable();
rcu_read_unlock();
preempt_enable();
cur_ops->readunlock(idx);
/* Wait for them all to get done so we can safely return. */
rcu_barrier();
pr_alert("%s: WARN: Duplicate call_rcu() test complete.\n", KBUILD_MODNAME);
cur_ops->cb_barrier();
pr_alert("%s: WARN: Duplicate call_%s() test complete.\n", KBUILD_MODNAME, cur_ops->name);
destroy_rcu_head_on_stack(&rh1);
destroy_rcu_head_on_stack(&rh2);
kfree(rhp);
#else /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
pr_alert("%s: !CONFIG_DEBUG_OBJECTS_RCU_HEAD, not testing duplicate call_rcu()\n", KBUILD_MODNAME);
#endif /* #else #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
}
static void rcutorture_sync(void)

View file

@ -63,6 +63,7 @@ do { \
#define SCALEOUT_ERRSTRING(s, x...) pr_alert("%s" SCALE_FLAG "!!! " s "\n", scale_type, ## x)
MODULE_DESCRIPTION("Scalability test for object reference mechanisms");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Joel Fernandes (Google) <joel@joelfernandes.org>");

View file

@ -277,7 +277,8 @@ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
unsigned long cur_s = READ_ONCE(ssp->srcu_idx);
barrier();
return ULONG_CMP_GE(cur_s, cookie) || ULONG_CMP_LT(cur_s, cookie - 3);
return cookie == SRCU_GET_STATE_COMPLETED ||
ULONG_CMP_GE(cur_s, cookie) || ULONG_CMP_LT(cur_s, cookie - 3);
}
EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu);

View file

@ -667,7 +667,10 @@ void cleanup_srcu_struct(struct srcu_struct *ssp)
pr_info("%s: Active srcu_struct %p read state: %d gp state: %lu/%lu\n",
__func__, ssp, rcu_seq_state(READ_ONCE(sup->srcu_gp_seq)),
rcu_seq_current(&sup->srcu_gp_seq), sup->srcu_gp_seq_needed);
return; /* Caller forgot to stop doing call_srcu()? */
return; // Caller forgot to stop doing call_srcu()?
// Or caller invoked start_poll_synchronize_srcu()
// and then cleanup_srcu_struct() before that grace
// period ended?
}
kfree(sup->node);
sup->node = NULL;
@ -845,7 +848,6 @@ static void srcu_gp_end(struct srcu_struct *ssp)
bool cbs;
bool last_lvl;
int cpu;
unsigned long flags;
unsigned long gpseq;
int idx;
unsigned long mask;
@ -907,12 +909,12 @@ static void srcu_gp_end(struct srcu_struct *ssp)
if (!(gpseq & counter_wrap_check))
for_each_possible_cpu(cpu) {
sdp = per_cpu_ptr(ssp->sda, cpu);
spin_lock_irqsave_rcu_node(sdp, flags);
spin_lock_irq_rcu_node(sdp);
if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100))
sdp->srcu_gp_seq_needed = gpseq;
if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100))
sdp->srcu_gp_seq_needed_exp = gpseq;
spin_unlock_irqrestore_rcu_node(sdp, flags);
spin_unlock_irq_rcu_node(sdp);
}
/* Callback initiation done, allow grace periods after next. */
@ -1540,7 +1542,8 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu);
*/
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
{
if (!rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie))
if (cookie != SRCU_GET_STATE_COMPLETED &&
!rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie))
return false;
// Ensure that the end of the SRCU grace period happens before
// any subsequent code that the caller might execute.

View file

@ -122,7 +122,7 @@ void rcu_sync_enter(struct rcu_sync *rsp)
* we are called at early boot time but this shouldn't happen.
*/
}
WRITE_ONCE(rsp->gp_count, rsp->gp_count + 1);
rsp->gp_count++;
spin_unlock_irq(&rsp->rss_lock);
if (gp_state == GP_IDLE) {
@ -151,15 +151,11 @@ void rcu_sync_enter(struct rcu_sync *rsp)
*/
void rcu_sync_exit(struct rcu_sync *rsp)
{
int gpc;
WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_IDLE);
WARN_ON_ONCE(READ_ONCE(rsp->gp_count) == 0);
spin_lock_irq(&rsp->rss_lock);
gpc = rsp->gp_count - 1;
WRITE_ONCE(rsp->gp_count, gpc);
if (!gpc) {
WARN_ON_ONCE(rsp->gp_count == 0);
if (!--rsp->gp_count) {
if (rsp->gp_state == GP_PASSED) {
WRITE_ONCE(rsp->gp_state, GP_EXIT);
rcu_sync_call(rsp);
@ -178,10 +174,10 @@ void rcu_sync_dtor(struct rcu_sync *rsp)
{
int gp_state;
WARN_ON_ONCE(READ_ONCE(rsp->gp_count));
WARN_ON_ONCE(READ_ONCE(rsp->gp_state) == GP_PASSED);
spin_lock_irq(&rsp->rss_lock);
WARN_ON_ONCE(rsp->gp_count);
if (rsp->gp_state == GP_REPLAY)
WRITE_ONCE(rsp->gp_state, GP_EXIT);
gp_state = rsp->gp_state;

View file

@ -858,7 +858,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
// not know to synchronize with this RCU Tasks grace period) have
// completed exiting. The synchronize_rcu() in rcu_tasks_postgp()
// will take care of any tasks stuck in the non-preemptible region
// of do_exit() following its call to exit_tasks_rcu_stop().
// of do_exit() following its call to exit_tasks_rcu_finish().
// check_all_holdout_tasks(), repeatedly until holdout list is empty:
// Scans the holdout list, attempting to identify a quiescent state
// for each task on the list. If there is a quiescent state, the
@ -1220,7 +1220,7 @@ void exit_tasks_rcu_start(void)
* Remove the task from the "yet another list" because do_exit() is now
* non-preemptible, allowing synchronize_rcu() to wait beyond this point.
*/
void exit_tasks_rcu_stop(void)
void exit_tasks_rcu_finish(void)
{
unsigned long flags;
struct rcu_tasks_percpu *rtpcp;
@ -1231,22 +1231,12 @@ void exit_tasks_rcu_stop(void)
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
list_del_init(&t->rcu_tasks_exit_list);
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
}
/*
* Contribute to protect against tasklist scan blind spot while the
* task is exiting and may be removed from the tasklist. See
* corresponding synchronize_srcu() for further details.
*/
void exit_tasks_rcu_finish(void)
{
exit_tasks_rcu_stop();
exit_tasks_rcu_finish_trace(current);
exit_tasks_rcu_finish_trace(t);
}
#else /* #ifdef CONFIG_TASKS_RCU */
void exit_tasks_rcu_start(void) { }
void exit_tasks_rcu_stop(void) { }
void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
#endif /* #else #ifdef CONFIG_TASKS_RCU */
@ -1757,6 +1747,16 @@ static void rcu_tasks_trace_pregp_step(struct list_head *hop)
// allow safe access to the hop list.
for_each_online_cpu(cpu) {
rcu_read_lock();
// Note that cpu_curr_snapshot() picks up the target
// CPU's current task while its runqueue is locked with
// an smp_mb__after_spinlock(). This ensures that either
// the grace-period kthread will see that task's read-side
// critical section or the task will see the updater's pre-GP
// accesses. The trailing smp_mb() in cpu_curr_snapshot()
// does not currently play a role other than simplify
// that function's ordering semantics. If these simplified
// ordering semantics continue to be redundant, that smp_mb()
// might be removed.
t = cpu_curr_snapshot(cpu);
if (rcu_tasks_trace_pertask_prep(t, true))
trc_add_holdout(t, hop);

View file

@ -96,6 +96,7 @@ static struct rcu_state rcu_state = {
.ofl_lock = __ARCH_SPIN_LOCK_UNLOCKED,
.srs_cleanup_work = __WORK_INITIALIZER(rcu_state.srs_cleanup_work,
rcu_sr_normal_gp_cleanup_work),
.srs_cleanups_pending = ATOMIC_INIT(0),
};
/* Dump rcu_node combining tree at boot to verify correct setup. */
@ -175,6 +176,9 @@ static int gp_init_delay;
module_param(gp_init_delay, int, 0444);
static int gp_cleanup_delay;
module_param(gp_cleanup_delay, int, 0444);
static int nohz_full_patience_delay;
module_param(nohz_full_patience_delay, int, 0444);
static int nohz_full_patience_delay_jiffies;
// Add delay to rcu_read_unlock() for strict grace periods.
static int rcu_unlock_delay;
@ -295,16 +299,6 @@ static void rcu_dynticks_eqs_online(void)
ct_state_inc(RCU_DYNTICKS_IDX);
}
/*
* Snapshot the ->dynticks counter with full ordering so as to allow
* stable comparison of this counter with past and future snapshots.
*/
static int rcu_dynticks_snap(int cpu)
{
smp_mb(); // Fundamental RCU ordering guarantee.
return ct_dynticks_cpu_acquire(cpu);
}
/*
* Return true if the snapshot returned from rcu_dynticks_snap()
* indicates that RCU is in an extended quiescent state.
@ -321,7 +315,15 @@ static bool rcu_dynticks_in_eqs(int snap)
*/
static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
{
return snap != rcu_dynticks_snap(rdp->cpu);
/*
* The first failing snapshot is already ordered against the accesses
* performed by the remote CPU after it exits idle.
*
* The second snapshot therefore only needs to order against accesses
* performed by the remote CPU prior to entering idle and therefore can
* rely solely on acquire semantics.
*/
return snap != ct_dynticks_cpu_acquire(rdp->cpu);
}
/*
@ -769,7 +771,18 @@ static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
*/
static int dyntick_save_progress_counter(struct rcu_data *rdp)
{
rdp->dynticks_snap = rcu_dynticks_snap(rdp->cpu);
/*
* Full ordering between remote CPU's post idle accesses and updater's
* accesses prior to current GP (and also the started GP sequence number)
* is enforced by rcu_seq_start() implicit barrier and even further by
* smp_mb__after_unlock_lock() barriers chained all the way throughout the
* rnp locking tree since rcu_gp_init() and up to the current leaf rnp
* locking.
*
* Ordering between remote CPU's pre idle accesses and post grace period
* updater's accesses is enforced by the below acquire semantic.
*/
rdp->dynticks_snap = ct_dynticks_cpu_acquire(rdp->cpu);
if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));
rcu_gpnum_ovf(rdp->mynode, rdp);
@ -1660,6 +1673,9 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
rcu_sr_put_wait_head(rcu);
}
/* Order list manipulations with atomic access. */
atomic_dec_return_release(&rcu_state.srs_cleanups_pending);
}
/*
@ -1667,7 +1683,7 @@ static void rcu_sr_normal_gp_cleanup_work(struct work_struct *work)
*/
static void rcu_sr_normal_gp_cleanup(void)
{
struct llist_node *wait_tail, *next, *rcu;
struct llist_node *wait_tail, *next = NULL, *rcu = NULL;
int done = 0;
wait_tail = rcu_state.srs_wait_tail;
@ -1693,16 +1709,34 @@ static void rcu_sr_normal_gp_cleanup(void)
break;
}
// concurrent sr_normal_gp_cleanup work might observe this update.
smp_store_release(&rcu_state.srs_done_tail, wait_tail);
/*
* Fast path, no more users to process except putting the second last
* wait head if no inflight-workers. If there are in-flight workers,
* they will remove the last wait head.
*
* Note that the ACQUIRE orders atomic access with list manipulation.
*/
if (wait_tail->next && wait_tail->next->next == NULL &&
rcu_sr_is_wait_head(wait_tail->next) &&
!atomic_read_acquire(&rcu_state.srs_cleanups_pending)) {
rcu_sr_put_wait_head(wait_tail->next);
wait_tail->next = NULL;
}
/* Concurrent sr_normal_gp_cleanup work might observe this update. */
ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail);
smp_store_release(&rcu_state.srs_done_tail, wait_tail);
/*
* We schedule a work in order to perform a final processing
* of outstanding users(if still left) and releasing wait-heads
* added by rcu_sr_normal_gp_init() call.
*/
queue_work(sync_wq, &rcu_state.srs_cleanup_work);
if (wait_tail->next) {
atomic_inc(&rcu_state.srs_cleanups_pending);
if (!queue_work(sync_wq, &rcu_state.srs_cleanup_work))
atomic_dec(&rcu_state.srs_cleanups_pending);
}
}
/*
@ -1810,7 +1844,7 @@ static noinline_for_stack bool rcu_gp_init(void)
WRITE_ONCE(rcu_state.gp_state, RCU_GP_ONOFF);
/* Exclude CPU hotplug operations. */
rcu_for_each_leaf_node(rnp) {
local_irq_save(flags);
local_irq_disable();
arch_spin_lock(&rcu_state.ofl_lock);
raw_spin_lock_rcu_node(rnp);
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
@ -1818,7 +1852,7 @@ static noinline_for_stack bool rcu_gp_init(void)
/* Nothing to do on this leaf rcu_node structure. */
raw_spin_unlock_rcu_node(rnp);
arch_spin_unlock(&rcu_state.ofl_lock);
local_irq_restore(flags);
local_irq_enable();
continue;
}
@ -1855,7 +1889,7 @@ static noinline_for_stack bool rcu_gp_init(void)
raw_spin_unlock_rcu_node(rnp);
arch_spin_unlock(&rcu_state.ofl_lock);
local_irq_restore(flags);
local_irq_enable();
}
rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */
@ -4313,11 +4347,15 @@ static int rcu_pending(int user)
return 1;
/* Is this a nohz_full CPU in userspace or idle? (Ignore RCU if so.) */
if ((user || rcu_is_cpu_rrupt_from_idle()) && rcu_nohz_full_cpu())
gp_in_progress = rcu_gp_in_progress();
if ((user || rcu_is_cpu_rrupt_from_idle() ||
(gp_in_progress &&
time_before(jiffies, READ_ONCE(rcu_state.gp_start) +
nohz_full_patience_delay_jiffies))) &&
rcu_nohz_full_cpu())
return 0;
/* Is the RCU core waiting for a quiescent state from this CPU? */
gp_in_progress = rcu_gp_in_progress();
if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm && gp_in_progress)
return 1;
@ -4767,7 +4805,7 @@ rcu_boot_init_percpu_data(int cpu)
rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
INIT_WORK(&rdp->strict_work, strict_work_handler);
WARN_ON_ONCE(ct->dynticks_nesting != 1);
WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu)));
WARN_ON_ONCE(rcu_dynticks_in_eqs(ct_dynticks_cpu(cpu)));
rdp->barrier_seq_snap = rcu_state.barrier_sequence;
rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
rdp->rcu_ofl_gp_state = RCU_GP_CLEANED;
@ -5110,11 +5148,15 @@ void rcutree_migrate_callbacks(int cpu)
struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
bool needwake;
if (rcu_rdp_is_offloaded(rdp) ||
rcu_segcblist_empty(&rdp->cblist))
return; /* No callbacks to migrate. */
if (rcu_rdp_is_offloaded(rdp))
return;
raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
if (rcu_segcblist_empty(&rdp->cblist)) {
raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
return; /* No callbacks to migrate. */
}
WARN_ON_ONCE(rcu_rdp_cpu_online(rdp));
rcu_barrier_entrain(rdp);
my_rdp = this_cpu_ptr(&rcu_data);

View file

@ -223,7 +223,6 @@ struct rcu_data {
struct swait_queue_head nocb_state_wq; /* For offloading state changes */
struct task_struct *nocb_gp_kthread;
raw_spinlock_t nocb_lock; /* Guard following pair of fields. */
atomic_t nocb_lock_contended; /* Contention experienced. */
int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
struct timer_list nocb_timer; /* Enforce finite deferral. */
unsigned long nocb_gp_adv_time; /* Last call_rcu() CB adv (jiffies). */
@ -420,6 +419,7 @@ struct rcu_state {
struct llist_node *srs_done_tail; /* ready for GP users. */
struct sr_wait_node srs_wait_nodes[SR_NORMAL_GP_WAIT_HEAD_MAX];
struct work_struct srs_cleanup_work;
atomic_t srs_cleanups_pending; /* srs inflight worker cleanups. */
};
/* Values for rcu_state structure's gp_flags field. */

View file

@ -265,7 +265,12 @@ static bool sync_exp_work_done(unsigned long s)
{
if (rcu_exp_gp_seq_done(s)) {
trace_rcu_exp_grace_period(rcu_state.name, s, TPS("done"));
smp_mb(); /* Ensure test happens before caller kfree(). */
/*
* Order GP completion with preceding accesses. Order also GP
* completion with post GP update side accesses. Pairs with
* rcu_seq_end().
*/
smp_mb();
return true;
}
return false;
@ -357,7 +362,21 @@ static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp)
!(rnp->qsmaskinitnext & mask)) {
mask_ofl_test |= mask;
} else {
snap = rcu_dynticks_snap(cpu);
/*
* Full ordering between remote CPU's post idle accesses
* and updater's accesses prior to current GP (and also
* the started GP sequence number) is enforced by
* rcu_seq_start() implicit barrier, relayed by kworkers
* locking and even further by smp_mb__after_unlock_lock()
* barriers chained all the way throughout the rnp locking
* tree since sync_exp_reset_tree() and up to the current
* leaf rnp locking.
*
* Ordering between remote CPU's pre idle accesses and
* post grace period updater's accesses is enforced by the
* below acquire semantic.
*/
snap = ct_dynticks_cpu_acquire(cpu);
if (rcu_dynticks_in_eqs(snap))
mask_ofl_test |= mask;
else
@ -953,7 +972,6 @@ void synchronize_rcu_expedited(void)
rnp = rcu_get_root();
wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
sync_exp_work_done(s));
smp_mb(); /* Work actions happen before return. */
/* Let the next expedited grace period start. */
mutex_unlock(&rcu_state.exp_mutex);

View file

@ -91,8 +91,7 @@ module_param(nocb_nobypass_lim_per_jiffy, int, 0);
/*
* Acquire the specified rcu_data structure's ->nocb_bypass_lock. If the
* lock isn't immediately available, increment ->nocb_lock_contended to
* flag the contention.
* lock isn't immediately available, perform minimal sanity check.
*/
static void rcu_nocb_bypass_lock(struct rcu_data *rdp)
__acquires(&rdp->nocb_bypass_lock)
@ -100,29 +99,12 @@ static void rcu_nocb_bypass_lock(struct rcu_data *rdp)
lockdep_assert_irqs_disabled();
if (raw_spin_trylock(&rdp->nocb_bypass_lock))
return;
atomic_inc(&rdp->nocb_lock_contended);
/*
* Contention expected only when local enqueue collide with
* remote flush from kthreads.
*/
WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
smp_mb__after_atomic(); /* atomic_inc() before lock. */
raw_spin_lock(&rdp->nocb_bypass_lock);
smp_mb__before_atomic(); /* atomic_dec() after lock. */
atomic_dec(&rdp->nocb_lock_contended);
}
/*
* Spinwait until the specified rcu_data structure's ->nocb_lock is
* not contended. Please note that this is extremely special-purpose,
* relying on the fact that at most two kthreads and one CPU contend for
* this lock, and also that the two kthreads are guaranteed to have frequent
* grace-period-duration time intervals between successive acquisitions
* of the lock. This allows us to use an extremely simple throttling
* mechanism, and further to apply it only to the CPU doing floods of
* call_rcu() invocations. Don't try this at home!
*/
static void rcu_nocb_wait_contended(struct rcu_data *rdp)
{
WARN_ON_ONCE(smp_processor_id() != rdp->cpu);
while (WARN_ON_ONCE(atomic_read(&rdp->nocb_lock_contended)))
cpu_relax();
}
/*
@ -510,7 +492,6 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
}
// We need to use the bypass.
rcu_nocb_wait_contended(rdp);
rcu_nocb_bypass_lock(rdp);
ncbs = rcu_cblist_n_cbs(&rdp->nocb_bypass);
rcu_segcblist_inc_len(&rdp->cblist); /* Must precede enqueue. */
@ -635,8 +616,7 @@ static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head,
}
}
static int nocb_gp_toggle_rdp(struct rcu_data *rdp,
bool *wake_state)
static int nocb_gp_toggle_rdp(struct rcu_data *rdp)
{
struct rcu_segcblist *cblist = &rdp->cblist;
unsigned long flags;
@ -650,8 +630,6 @@ static int nocb_gp_toggle_rdp(struct rcu_data *rdp,
* We will handle this rdp until it ever gets de-offloaded.
*/
rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_GP);
if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB))
*wake_state = true;
ret = 1;
} else if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED) &&
rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)) {
@ -660,8 +638,6 @@ static int nocb_gp_toggle_rdp(struct rcu_data *rdp,
* We will ignore this rdp until it ever gets re-offloaded.
*/
rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_GP);
if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB))
*wake_state = true;
ret = 0;
} else {
WARN_ON_ONCE(1);
@ -877,16 +853,15 @@ static void nocb_gp_wait(struct rcu_data *my_rdp)
}
if (rdp_toggling) {
bool wake_state = false;
int ret;
ret = nocb_gp_toggle_rdp(rdp_toggling, &wake_state);
ret = nocb_gp_toggle_rdp(rdp_toggling);
if (ret == 1)
list_add_tail(&rdp_toggling->nocb_entry_rdp, &my_rdp->nocb_head_rdp);
else if (ret == 0)
list_del(&rdp_toggling->nocb_entry_rdp);
if (wake_state)
swake_up_one(&rdp_toggling->nocb_state_wq);
swake_up_one(&rdp_toggling->nocb_state_wq);
}
my_rdp->nocb_gp_seq = -1;
@ -913,16 +888,9 @@ static int rcu_nocb_gp_kthread(void *arg)
return 0;
}
static inline bool nocb_cb_can_run(struct rcu_data *rdp)
{
u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_CB;
return rcu_segcblist_test_flags(&rdp->cblist, flags);
}
static inline bool nocb_cb_wait_cond(struct rcu_data *rdp)
{
return nocb_cb_can_run(rdp) && !READ_ONCE(rdp->nocb_cb_sleep);
return !READ_ONCE(rdp->nocb_cb_sleep) || kthread_should_park();
}
/*
@ -934,21 +902,19 @@ static void nocb_cb_wait(struct rcu_data *rdp)
struct rcu_segcblist *cblist = &rdp->cblist;
unsigned long cur_gp_seq;
unsigned long flags;
bool needwake_state = false;
bool needwake_gp = false;
bool can_sleep = true;
struct rcu_node *rnp = rdp->mynode;
do {
swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
nocb_cb_wait_cond(rdp));
if (READ_ONCE(rdp->nocb_cb_sleep)) {
WARN_ON(signal_pending(current));
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
}
} while (!nocb_cb_can_run(rdp));
swait_event_interruptible_exclusive(rdp->nocb_cb_wq,
nocb_cb_wait_cond(rdp));
if (kthread_should_park()) {
kthread_parkme();
} else if (READ_ONCE(rdp->nocb_cb_sleep)) {
WARN_ON(signal_pending(current));
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
}
WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp));
local_irq_save(flags);
rcu_momentary_dyntick_idle();
@ -971,37 +937,16 @@ static void nocb_cb_wait(struct rcu_data *rdp)
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
}
if (rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED)) {
if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB)) {
rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_CB);
if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP))
needwake_state = true;
}
if (rcu_segcblist_ready_cbs(cblist))
can_sleep = false;
} else {
/*
* De-offloading. Clear our flag and notify the de-offload worker.
* We won't touch the callbacks and keep sleeping until we ever
* get re-offloaded.
*/
WARN_ON_ONCE(!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB));
rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_CB);
if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP))
needwake_state = true;
}
WRITE_ONCE(rdp->nocb_cb_sleep, can_sleep);
if (rdp->nocb_cb_sleep)
if (!rcu_segcblist_ready_cbs(cblist)) {
WRITE_ONCE(rdp->nocb_cb_sleep, true);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep"));
} else {
WRITE_ONCE(rdp->nocb_cb_sleep, false);
}
rcu_nocb_unlock_irqrestore(rdp, flags);
if (needwake_gp)
rcu_gp_kthread_wake();
if (needwake_state)
swake_up_one(&rdp->nocb_state_wq);
}
/*
@ -1094,17 +1039,8 @@ static int rdp_offload_toggle(struct rcu_data *rdp,
bool wake_gp = false;
rcu_segcblist_offload(cblist, offload);
if (rdp->nocb_cb_sleep)
rdp->nocb_cb_sleep = false;
rcu_nocb_unlock_irqrestore(rdp, flags);
/*
* Ignore former value of nocb_cb_sleep and force wake up as it could
* have been spuriously set to false already.
*/
swake_up_one(&rdp->nocb_cb_wq);
raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags);
// Queue this rdp for add/del to/from the list to iterate on rcuog
WRITE_ONCE(rdp_gp->nocb_toggling_rdp, rdp);
@ -1161,19 +1097,11 @@ static long rcu_nocb_rdp_deoffload(void *arg)
if (wake_gp)
wake_up_process(rdp_gp->nocb_gp_kthread);
/*
* If rcuo[p] kthread spawn failed, directly remove SEGCBLIST_KTHREAD_CB.
* Just wait SEGCBLIST_KTHREAD_GP to be cleared by rcuog.
*/
if (!rdp->nocb_cb_kthread) {
rcu_nocb_lock_irqsave(rdp, flags);
rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB);
rcu_nocb_unlock_irqrestore(rdp, flags);
}
swait_event_exclusive(rdp->nocb_state_wq,
!rcu_segcblist_test_flags(cblist,
SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP));
!rcu_segcblist_test_flags(cblist,
SEGCBLIST_KTHREAD_GP));
if (rdp->nocb_cb_kthread)
kthread_park(rdp->nocb_cb_kthread);
} else {
/*
* No kthread to clear the flags for us or remove the rdp from the nocb list
@ -1181,8 +1109,7 @@ static long rcu_nocb_rdp_deoffload(void *arg)
* but we stick to paranoia in this rare path.
*/
rcu_nocb_lock_irqsave(rdp, flags);
rcu_segcblist_clear_flags(&rdp->cblist,
SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP);
rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP);
rcu_nocb_unlock_irqrestore(rdp, flags);
list_del(&rdp->nocb_entry_rdp);
@ -1282,8 +1209,10 @@ static long rcu_nocb_rdp_offload(void *arg)
wake_gp = rdp_offload_toggle(rdp, true, flags);
if (wake_gp)
wake_up_process(rdp_gp->nocb_gp_kthread);
kthread_unpark(rdp->nocb_cb_kthread);
swait_event_exclusive(rdp->nocb_state_wq,
rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) &&
rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP));
/*
@ -1468,7 +1397,7 @@ void __init rcu_init_nohz(void)
if (rcu_segcblist_empty(&rdp->cblist))
rcu_segcblist_init(&rdp->cblist);
rcu_segcblist_offload(&rdp->cblist, true);
rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP);
rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP);
rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_RCU_CORE);
}
rcu_organize_nocb_kthreads();
@ -1526,11 +1455,16 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu)
mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex);
/* Spawn the kthread for this CPU. */
t = kthread_run(rcu_nocb_cb_kthread, rdp,
"rcuo%c/%d", rcu_state.abbr, cpu);
t = kthread_create(rcu_nocb_cb_kthread, rdp,
"rcuo%c/%d", rcu_state.abbr, cpu);
if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__))
goto end;
if (rcu_rdp_is_offloaded(rdp))
wake_up_process(t);
else
kthread_park(t);
if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_CB_BOOST) && kthread_prio)
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
@ -1678,12 +1612,11 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
sprintf(bufw, "%ld", rsclp->gp_seq[RCU_WAIT_TAIL]);
sprintf(bufr, "%ld", rsclp->gp_seq[RCU_NEXT_READY_TAIL]);
pr_info(" CB %d^%d->%d %c%c%c%c%c%c F%ld L%ld C%d %c%c%s%c%s%c%c q%ld %c CPU %d%s\n",
pr_info(" CB %d^%d->%d %c%c%c%c%c F%ld L%ld C%d %c%c%s%c%s%c%c q%ld %c CPU %d%s\n",
rdp->cpu, rdp->nocb_gp_rdp->cpu,
nocb_next_rdp ? nocb_next_rdp->cpu : -1,
"kK"[!!rdp->nocb_cb_kthread],
"bB"[raw_spin_is_locked(&rdp->nocb_bypass_lock)],
"cC"[!!atomic_read(&rdp->nocb_lock_contended)],
"lL"[raw_spin_is_locked(&rdp->nocb_lock)],
"sS"[!!rdp->nocb_cb_sleep],
".W"[swait_active(&rdp->nocb_cb_wq)],

View file

@ -28,8 +28,8 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp)
!(lockdep_is_held(&rcu_state.barrier_mutex) ||
(IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) ||
rcu_lockdep_is_held_nocb(rdp) ||
(rdp == this_cpu_ptr(&rcu_data) &&
!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible())) ||
(!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) &&
rdp == this_cpu_ptr(&rcu_data)) ||
rcu_current_is_nocb_kthread(rdp)),
"Unsafe read of RCU_NOCB offloaded state"
);
@ -93,6 +93,16 @@ static void __init rcu_bootup_announce_oddness(void)
pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
if (gp_cleanup_delay)
pr_info("\tRCU debug GP cleanup slowdown %d jiffies.\n", gp_cleanup_delay);
if (nohz_full_patience_delay < 0) {
pr_info("\tRCU NOCB CPU patience negative (%d), resetting to zero.\n", nohz_full_patience_delay);
nohz_full_patience_delay = 0;
} else if (nohz_full_patience_delay > 5 * MSEC_PER_SEC) {
pr_info("\tRCU NOCB CPU patience too large (%d), resetting to %ld.\n", nohz_full_patience_delay, 5 * MSEC_PER_SEC);
nohz_full_patience_delay = 5 * MSEC_PER_SEC;
} else if (nohz_full_patience_delay) {
pr_info("\tRCU NOCB CPU patience set to %d milliseconds.\n", nohz_full_patience_delay);
}
nohz_full_patience_delay_jiffies = msecs_to_jiffies(nohz_full_patience_delay);
if (!use_softirq)
pr_info("\tRCU_SOFTIRQ processing moved to rcuc kthreads.\n");
if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))

View file

@ -501,7 +501,7 @@ static void print_cpu_stall_info(int cpu)
}
delta = rcu_seq_ctr(rdp->mynode->gp_seq - rdp->rcu_iw_gp_seq);
falsepositive = rcu_is_gp_kthread_starving(NULL) &&
rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu));
rcu_dynticks_in_eqs(ct_dynticks_cpu(cpu));
rcuc_starved = rcu_is_rcuc_kthread_starving(rdp, &j);
if (rcuc_starved)
// Print signed value, as negative values indicate a probable bug.
@ -515,7 +515,7 @@ static void print_cpu_stall_info(int cpu)
rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' :
"!."[!delta],
ticks_value, ticks_title,
rcu_dynticks_snap(cpu) & 0xffff,
ct_dynticks_cpu(cpu) & 0xffff,
ct_dynticks_nesting_cpu(cpu), ct_dynticks_nmi_nesting_cpu(cpu),
rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
data_race(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,

View file

@ -4467,12 +4467,7 @@ int task_call_func(struct task_struct *p, task_call_f func, void *arg)
* @cpu: The CPU on which to snapshot the task.
*
* Returns the task_struct pointer of the task "currently" running on
* the specified CPU. If the same task is running on that CPU throughout,
* the return value will be a pointer to that task's task_struct structure.
* If the CPU did any context switches even vaguely concurrently with the
* execution of this function, the return value will be a pointer to the
* task_struct structure of a randomly chosen task that was running on
* that CPU somewhere around the time that this function was executing.
* the specified CPU.
*
* If the specified CPU was offline, the return value is whatever it
* is, perhaps a pointer to the task_struct structure of that CPU's idle
@ -4486,11 +4481,16 @@ int task_call_func(struct task_struct *p, task_call_f func, void *arg)
*/
struct task_struct *cpu_curr_snapshot(int cpu)
{
struct rq *rq = cpu_rq(cpu);
struct task_struct *t;
struct rq_flags rf;
smp_mb(); /* Pairing determined by caller's synchronization design. */
rq_lock_irqsave(rq, &rf);
smp_mb__after_spinlock(); /* Pairing determined by caller's synchronization design. */
t = rcu_dereference(cpu_curr(cpu));
rq_unlock_irqrestore(rq, &rf);
smp_mb(); /* Pairing determined by caller's synchronization design. */
return t;
}

52
tools/rcu/rcu-updaters.sh Executable file
View file

@ -0,0 +1,52 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
#
# Run bpftrace to obtain a histogram of the types of primitives used to
# initiate RCU grace periods. The count associated with rcu_gp_init()
# is the number of normal (non-expedited) grace periods.
#
# Usage: rcu-updaters.sh [ duration-in-seconds ]
#
# Note that not all kernel builds have all of these functions. In those
# that do not, this script will issue a diagnostic for each that is not
# found, but continue normally for the rest of the functions.
duration=${1}
if test -n "${duration}"
then
exitclause='interval:s:'"${duration}"' { exit(); }'
else
echo 'Hit control-C to end sample and print results.'
fi
bpftrace -e 'kprobe:kvfree_call_rcu,
kprobe:call_rcu,
kprobe:call_rcu_tasks,
kprobe:call_rcu_tasks_rude,
kprobe:call_rcu_tasks_trace,
kprobe:call_srcu,
kprobe:rcu_barrier,
kprobe:rcu_barrier_tasks,
kprobe:rcu_barrier_tasks_rude,
kprobe:rcu_barrier_tasks_trace,
kprobe:srcu_barrier,
kprobe:synchronize_rcu,
kprobe:synchronize_rcu_expedited,
kprobe:synchronize_rcu_tasks,
kprobe:synchronize_rcu_tasks_rude,
kprobe:synchronize_rcu_tasks_trace,
kprobe:synchronize_srcu,
kprobe:synchronize_srcu_expedited,
kprobe:get_state_synchronize_rcu,
kprobe:get_state_synchronize_rcu_full,
kprobe:start_poll_synchronize_rcu,
kprobe:start_poll_synchronize_rcu_expedited,
kprobe:start_poll_synchronize_rcu_full,
kprobe:start_poll_synchronize_rcu_expedited_full,
kprobe:poll_state_synchronize_rcu,
kprobe:poll_state_synchronize_rcu_full,
kprobe:cond_synchronize_rcu,
kprobe:cond_synchronize_rcu_full,
kprobe:start_poll_synchronize_srcu,
kprobe:poll_state_synchronize_srcu,
kprobe:rcu_gp_init
{ @counts[func] = count(); } '"${exitclause}"