aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/lockdep-splat.rst2
-rw-r--r--Documentation/RCU/rculist_nulls.rst38
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt56
-rw-r--r--include/linux/notifier.h11
-rw-r--r--include/linux/rculist_nulls.h4
-rw-r--r--include/linux/rcupdate_trace.h1
-rw-r--r--include/linux/rcupdate_wait.h5
-rw-r--r--include/linux/srcutiny.h4
-rw-r--r--include/linux/torture.h7
-rw-r--r--kernel/locking/locktorture.c12
-rw-r--r--kernel/rcu/rcu.h8
-rw-r--r--kernel/rcu/rcuscale.c83
-rw-r--r--kernel/rcu/rcutorture.c7
-rw-r--r--kernel/rcu/refscale.c37
-rw-r--r--kernel/rcu/tasks.h136
-rw-r--r--kernel/rcu/tree.c16
-rw-r--r--kernel/rcu/tree_nocb.h4
-rw-r--r--kernel/torture.c39
-rwxr-xr-xscripts/checkpatch.pl24
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh8
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TRACE012
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT1
23 files changed, 421 insertions, 86 deletions
diff --git a/Documentation/RCU/lockdep-splat.rst b/Documentation/RCU/lockdep-splat.rst
index 2a5c79db57dc..bcbc4b3c88d7 100644
--- a/Documentation/RCU/lockdep-splat.rst
+++ b/Documentation/RCU/lockdep-splat.rst
@@ -10,7 +10,7 @@ misuses of the RCU API, most notably using one of the rcu_dereference()
family to access an RCU-protected pointer without the proper protection.
When such misuse is detected, an lockdep-RCU splat is emitted.
-The usual cause of a lockdep-RCU slat is someone accessing an
+The usual cause of a lockdep-RCU splat is someone accessing an
RCU-protected data structure without either (1) being in the right kind of
RCU read-side critical section or (2) holding the right update-side lock.
This problem can therefore be serious: it might result in random memory
diff --git a/Documentation/RCU/rculist_nulls.rst b/Documentation/RCU/rculist_nulls.rst
index 9a734bf54b76..21e40fcc08de 100644
--- a/Documentation/RCU/rculist_nulls.rst
+++ b/Documentation/RCU/rculist_nulls.rst
@@ -18,7 +18,16 @@ to solve following problem.
Without 'nulls', a typical RCU linked list managing objects which are
allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use the following
-algorithms:
+algorithms. Following examples assume 'obj' is a pointer to such
+objects, which is having below type.
+
+::
+
+ struct object {
+ struct hlist_node obj_node;
+ atomic_t refcnt;
+ unsigned int key;
+ };
1) Lookup algorithm
-------------------
@@ -26,11 +35,13 @@ algorithms:
::
begin:
- rcu_read_lock()
+ rcu_read_lock();
obj = lockless_lookup(key);
if (obj) {
- if (!try_get_ref(obj)) // might fail for free objects
+ if (!try_get_ref(obj)) { // might fail for free objects
+ rcu_read_unlock();
goto begin;
+ }
/*
* Because a writer could delete object, and a writer could
* reuse these object before the RCU grace period, we
@@ -54,7 +65,7 @@ but a version with an additional memory barrier (smp_rmb())
struct hlist_node *node, *next;
for (pos = rcu_dereference((head)->first);
pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
- ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+ ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
pos = rcu_dereference(next))
if (obj->key == key)
return obj;
@@ -66,10 +77,10 @@ And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb()::
struct hlist_node *node;
for (pos = rcu_dereference((head)->first);
pos && ({ prefetch(pos->next); 1; }) &&
- ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+ ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
pos = rcu_dereference(pos->next))
- if (obj->key == key)
- return obj;
+ if (obj->key == key)
+ return obj;
return NULL;
Quoting Corey Minyard::
@@ -86,7 +97,7 @@ Quoting Corey Minyard::
2) Insertion algorithm
----------------------
-We need to make sure a reader cannot read the new 'obj->obj_next' value
+We need to make sure a reader cannot read the new 'obj->obj_node.next' value
and previous value of 'obj->key'. Otherwise, an item could be deleted
from a chain, and inserted into another chain. If new chain was empty
before the move, 'next' pointer is NULL, and lockless reader can not
@@ -129,8 +140,7 @@ very very fast (before the end of RCU grace period)
Avoiding extra smp_rmb()
========================
-With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
-and extra _release() in insert function.
+With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup().
For example, if we choose to store the slot number as the 'nulls'
end-of-list marker for each slot of the hash table, we can detect
@@ -142,6 +152,9 @@ the beginning. If the object was moved to the same chain,
then the reader doesn't care: It might occasionally
scan the list again without harm.
+Note that using hlist_nulls means the type of 'obj_node' field of
+'struct object' becomes 'struct hlist_nulls_node'.
+
1) lookup algorithm
-------------------
@@ -151,7 +164,7 @@ scan the list again without harm.
head = &table[slot];
begin:
rcu_read_lock();
- hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
+ hlist_nulls_for_each_entry_rcu(obj, node, head, obj_node) {
if (obj->key == key) {
if (!try_get_ref(obj)) { // might fail for free objects
rcu_read_unlock();
@@ -182,6 +195,9 @@ scan the list again without harm.
2) Insert algorithm
-------------------
+Same to the above one, but uses hlist_nulls_add_head_rcu() instead of
+hlist_add_head_rcu().
+
::
/*
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1457995fd41..3a6e30dc82f3 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2918,6 +2918,10 @@
locktorture.torture_type= [KNL]
Specify the locking implementation to test.
+ locktorture.writer_fifo= [KNL]
+ Run the write-side locktorture kthreads at
+ sched_set_fifo() real-time priority.
+
locktorture.verbose= [KNL]
Enable additional printk() statements.
@@ -4928,6 +4932,15 @@
test until boot completes in order to avoid
interference.
+ rcuscale.kfree_by_call_rcu= [KNL]
+ In kernels built with CONFIG_RCU_LAZY=y, test
+ call_rcu() instead of kfree_rcu().
+
+ rcuscale.kfree_mult= [KNL]
+ Instead of allocating an object of size kfree_obj,
+ allocate one of kfree_mult * sizeof(kfree_obj).
+ Defaults to 1.
+
rcuscale.kfree_rcu_test= [KNL]
Set to measure performance of kfree_rcu() flooding.
@@ -4953,6 +4966,12 @@
Number of loops doing rcuscale.kfree_alloc_num number
of allocations and frees.
+ rcuscale.minruntime= [KNL]
+ Set the minimum test run time in seconds. This
+ does not affect the data-collection interval,
+ but instead allows better measurement of things
+ like CPU consumption.
+
rcuscale.nreaders= [KNL]
Set number of RCU readers. The value -1 selects
N, where N is the number of CPUs. A value
@@ -4967,7 +4986,7 @@
the same as for rcuscale.nreaders.
N, where N is the number of CPUs
- rcuscale.perf_type= [KNL]
+ rcuscale.scale_type= [KNL]
Specify the RCU implementation to test.
rcuscale.shutdown= [KNL]
@@ -4983,6 +5002,11 @@
in microseconds. The default of zero says
no holdoff.
+ rcuscale.writer_holdoff_jiffies= [KNL]
+ Additional write-side holdoff between grace
+ periods, but in jiffies. The default of zero
+ says no holdoff.
+
rcutorture.fqs_duration= [KNL]
Set duration of force_quiescent_state bursts
in microseconds.
@@ -5264,6 +5288,13 @@
number avoids disturbing real-time workloads,
but lengthens grace periods.
+ rcupdate.rcu_task_lazy_lim= [KNL]
+ Number of callbacks on a given CPU that will
+ cancel laziness on that CPU. Use -1 to disable
+ cancellation of laziness, but be advised that
+ doing so increases the danger of OOM due to
+ callback flooding.
+
rcupdate.rcu_task_stall_info= [KNL]
Set initial timeout in jiffies for RCU task stall
informational messages, which give some indication
@@ -5293,6 +5324,29 @@
A change in value does not take effect until
the beginning of the next grace period.
+ rcupdate.rcu_tasks_lazy_ms= [KNL]
+ Set timeout in milliseconds RCU Tasks asynchronous
+ callback batching for call_rcu_tasks().
+ A negative value will take the default. A value
+ of zero will disable batching. Batching is
+ always disabled for synchronize_rcu_tasks().
+
+ rcupdate.rcu_tasks_rude_lazy_ms= [KNL]
+ Set timeout in milliseconds RCU Tasks
+ Rude asynchronous callback batching for
+ call_rcu_tasks_rude(). A negative value
+ will take the default. A value of zero will
+ disable batching. Batching is always disabled
+ for synchronize_rcu_tasks_rude().
+
+ rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
+ Set timeout in milliseconds RCU Tasks
+ Trace asynchronous callback batching for
+ call_rcu_tasks_trace(). A negative value
+ will take the default. A value of zero will
+ disable batching. Batching is always disabled
+ for synchronize_rcu_tasks_trace().
+
rcupdate.rcu_self_test= [KNL]
Run the RCU early boot self tests
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 86544707236a..45702bdcbceb 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -73,9 +73,7 @@ struct raw_notifier_head {
struct srcu_notifier_head {
struct mutex mutex;
-#ifdef CONFIG_TREE_SRCU
struct srcu_usage srcuu;
-#endif
struct srcu_struct srcu;
struct notifier_block __rcu *head;
};
@@ -106,7 +104,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
#define RAW_NOTIFIER_INIT(name) { \
.head = NULL }
-#ifdef CONFIG_TREE_SRCU
#define SRCU_NOTIFIER_INIT(name, pcpu) \
{ \
.mutex = __MUTEX_INITIALIZER(name.mutex), \
@@ -114,14 +111,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
.srcuu = __SRCU_USAGE_INIT(name.srcuu), \
.srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
}
-#else
-#define SRCU_NOTIFIER_INIT(name, pcpu) \
- { \
- .mutex = __MUTEX_INITIALIZER(name.mutex), \
- .head = NULL, \
- .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
- }
-#endif
#define ATOMIC_NOTIFIER_HEAD(name) \
struct atomic_notifier_head name = \
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index ba4c00dd8005..89186c499dd4 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -101,7 +101,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
{
struct hlist_nulls_node *first = h->first;
- n->next = first;
+ WRITE_ONCE(n->next, first);
WRITE_ONCE(n->pprev, &h->first);
rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
if (!is_a_nulls(first))
@@ -137,7 +137,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
last = i;
if (last) {
- n->next = last->next;
+ WRITE_ONCE(n->next, last->next);
n->pprev = &last->next;
rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
} else {
diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index 9bc8cbb33340..eda493200663 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -87,6 +87,7 @@ static inline void rcu_read_unlock_trace(void)
void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
void synchronize_rcu_tasks_trace(void);
void rcu_barrier_tasks_trace(void);
+struct task_struct *get_rcu_tasks_trace_gp_kthread(void);
#else
/*
* The BPF JIT forms these addresses even when it doesn't call these
diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
index 699b938358bf..5e0f74f2f8ca 100644
--- a/include/linux/rcupdate_wait.h
+++ b/include/linux/rcupdate_wait.h
@@ -42,6 +42,11 @@ do { \
* call_srcu() function, with this wrapper supplying the pointer to the
* corresponding srcu_struct.
*
+ * Note that call_rcu_hurry() should be used instead of call_rcu()
+ * because in kernels built with CONFIG_RCU_LAZY=y the delay between the
+ * invocation of call_rcu() and that of the corresponding RCU callback
+ * can be multiple seconds.
+ *
* The first argument tells Tiny RCU's _wait_rcu_gp() not to
* bother waiting for RCU. The reason for this is because anywhere
* synchronize_rcu_mult() can be called is automatically already a full
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index ebd72491af99..447133171d95 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -48,6 +48,10 @@ void srcu_drive_gp(struct work_struct *wp);
#define DEFINE_STATIC_SRCU(name) \
static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name, name)
+// Dummy structure for srcu_notifier_head.
+struct srcu_usage { };
+#define __SRCU_USAGE_INIT(name) { }
+
void synchronize_srcu(struct srcu_struct *ssp);
/*
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 7038104463e4..bb466eec01e4 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -108,12 +108,15 @@ bool torture_must_stop(void);
bool torture_must_stop_irq(void);
void torture_kthread_stopping(char *title);
int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
- char *f, struct task_struct **tp);
+ char *f, struct task_struct **tp, void (*cbf)(struct task_struct *tp));
void _torture_stop_kthread(char *m, struct task_struct **tp);
#define torture_create_kthread(n, arg, tp) \
_torture_create_kthread(n, (arg), #n, "Creating " #n " task", \
- "Failed to create " #n, &(tp))
+ "Failed to create " #n, &(tp), NULL)
+#define torture_create_kthread_cb(n, arg, tp, cbf) \
+ _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \
+ "Failed to create " #n, &(tp), cbf)
#define torture_stop_kthread(n, tp) \
_torture_stop_kthread("Stopping " #n " task", &(tp))
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 949d3deae506..270c7f80ce84 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -45,6 +45,7 @@ torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
torture_param(int, rt_boost, 2,
"Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types.");
torture_param(int, rt_boost_factor, 50, "A factor determining how often rt-boost happens.");
+torture_param(int, writer_fifo, 0, "Run writers at sched_set_fifo() priority");
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, nested_locks, 0, "Number of nested locks (max = 8)");
/* Going much higher trips "BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!" errors */
@@ -809,7 +810,8 @@ static int lock_torture_writer(void *arg)
bool skip_main_lock;
VERBOSE_TOROUT_STRING("lock_torture_writer task started");
- set_user_nice(current, MAX_NICE);
+ if (!rt_task(current))
+ set_user_nice(current, MAX_NICE);
do {
if ((torture_random(&rand) & 0xfffff) == 0)
@@ -1015,8 +1017,7 @@ static void lock_torture_cleanup(void)
if (writer_tasks) {
for (i = 0; i < cxt.nrealwriters_stress; i++)
- torture_stop_kthread(lock_torture_writer,
- writer_tasks[i]);
+ torture_stop_kthread(lock_torture_writer, writer_tasks[i]);
kfree(writer_tasks);
writer_tasks = NULL;
}
@@ -1244,8 +1245,9 @@ static int __init lock_torture_init(void)
goto create_reader;
/* Create writer. */
- firsterr = torture_create_kthread(lock_torture_writer, &cxt.lwsa[i],
- writer_tasks[i]);
+ firsterr = torture_create_kthread_cb(lock_torture_writer, &cxt.lwsa[i],
+ writer_tasks[i],
+ writer_fifo ? sched_set_fifo : NULL);
if (torture_init_error(firsterr))
goto unwind;
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 98c1544cf572..5befd8780dcd 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -511,6 +511,14 @@ static inline void show_rcu_tasks_gp_kthreads(void) {}
void rcu_request_urgent_qs_task(struct task_struct *t);
#endif /* #else #ifdef CONFIG_TINY_RCU */
+#ifdef CONFIG_TASKS_RCU
+struct task_struct *get_rcu_tasks_gp_kthread(void);
+#endif // # ifdef CONFIG_TASKS_RCU
+
+#ifdef CONFIG_TASKS_RUDE_RCU
+struct task_struct *get_rcu_tasks_rude_gp_kthread(void);
+#endif // # ifdef CONFIG_TASKS_RUDE_RCU
+
#define RCU_SCHEDULER_INACTIVE 0
#define RCU_SCHEDULER_INIT 1
#define RCU_SCHEDULER_RUNNING 2
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index d1221731c7cf..ffdb30495e3c 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -84,15 +84,17 @@ MODULE_AUTHOR("Paul E. McKenney <[email protected]>");
#endif
torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
-torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader");
+torture_param(int, gp_async_max, 1000, "Max # outstanding waits per writer");
torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
+torture_param(int, minruntime, 0, "Minimum run time (s)");
torture_param(int, nreaders, -1, "Number of RCU reader threads");
torture_param(int, nwriters, -1, "Number of RCU updater threads");
torture_param(bool, shutdown, RCUSCALE_SHUTDOWN,
"Shutdown at end of scalability tests.");
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
+torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable");
torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?");
torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?");
@@ -139,6 +141,7 @@ struct rcu_scale_ops {
void (*gp_barrier)(void);
void (*sync)(void);
void (*exp_sync)(void);
+ struct task_struct *(*rso_gp_kthread)(void);
const char *name;
};
@@ -295,6 +298,7 @@ static struct rcu_scale_ops tasks_ops = {
.gp_barrier = rcu_barrier_tasks,
.sync = synchronize_rcu_tasks,
.exp_sync = synchronize_rcu_tasks,
+ .rso_gp_kthread = get_rcu_tasks_gp_kthread,
.name = "tasks"
};
@@ -306,6 +310,44 @@ static struct rcu_scale_ops tasks_ops = {
#endif // #else // #ifdef CONFIG_TASKS_RCU
+#ifdef CONFIG_TASKS_RUDE_RCU
+
+/*
+ * Definitions for RCU-tasks-rude scalability testing.
+ */
+
+static int tasks_rude_scale_read_lock(void)
+{
+ return 0;
+}
+
+static void tasks_rude_scale_read_unlock(int idx)
+{
+}
+
+static struct rcu_scale_ops tasks_rude_ops = {
+ .ptype = RCU_TASKS_RUDE_FLAVOR,
+ .init = rcu_sync_scale_init,
+ .readlock = tasks_rude_scale_read_lock,
+ .readunlock = tasks_rude_scale_read_unlock,
+ .get_gp_seq = rcu_no_completed,
+ .gp_diff = rcu_seq_diff,
+ .async = call_rcu_tasks_rude,
+ .gp_barrier = rcu_barrier_tasks_rude,
+ .sync = synchronize_rcu_tasks_rude,
+ .exp_sync = synchronize_rcu_tasks_rude,
+ .rso_gp_kthread = get_rcu_tasks_rude_gp_kthread,
+ .name = "tasks-rude"
+};
+
+#define TASKS_RUDE_OPS &tasks_rude_ops,
+
+#else // #ifdef CONFIG_TASKS_RUDE_RCU
+
+#define TASKS_RUDE_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_RUDE_RCU
+
#ifdef CONFIG_TASKS_TRACE_RCU
/*
@@ -334,6 +376,7 @@ static struct rcu_scale_ops tasks_tracing_ops = {
.gp_barrier = rcu_barrier_tasks_trace,
.sync = synchronize_rcu_tasks_trace,
.exp_sync = synchronize_rcu_tasks_trace,
+ .rso_gp_kthread = get_rcu_tasks_trace_gp_kthread,
.name = "tasks-tracing"
};
@@ -410,10 +453,12 @@ rcu_scale_writer(void *arg)
{
int i = 0;
int i_max;
+ unsigned long jdone;
long me = (long)arg;
struct rcu_head *rhp = NULL;
bool started = false, done = false, alldone = false;
u64 t;
+ DEFINE_TORTURE_RANDOM(tr);
u64 *wdp;
u64 *wdpp = writer_durations[me];
@@ -424,7 +469,7 @@ rcu_scale_writer(void *arg)
sched_set_fifo_low(current);
if (holdoff)
- schedule_timeout_uninterruptible(holdoff * HZ);
+ schedule_timeout_idle(holdoff * HZ);
/*
* Wait until rcu_end_inkernel_boot() is called for normal GP tests
@@ -445,9 +490,12 @@ rcu_scale_writer(void *arg)
}
}
+ jdone = jiffies + minruntime * HZ;
do {
if (writer_holdoff)
udelay(writer_holdoff);
+ if (writer_holdoff_jiffies)
+ schedule_timeout_idle(torture_random(&tr) % writer_holdoff_jiffies + 1);
wdp = &wdpp[i];
*wdp = ktime_get_mono_fast_ns();
if (gp_async) {
@@ -475,7 +523,7 @@ retry:
if (!started &&
atomic_read(&n_rcu_scale_writer_started) >= nrealwriters)
started = true;
- if (!done && i >= MIN_MEAS) {
+ if (!done && i >= MIN_MEAS && time_after(jiffies, jdone)) {
done = true;
sched_set_normal(current, 0);
pr_alert("%s%s rcu_scale_writer %ld has %d measurements\n",
@@ -518,8 +566,8 @@ static void
rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag)
{
pr_alert("%s" SCALE_FLAG
- "--- %s: nreaders=%d nwriters=%d verbose=%d shutdown=%d\n",
- scale_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
+ "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown=%d\n",
+ scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown);
}
/*
@@ -556,6 +604,8 @@ static struct task_struct **kfree_reader_tasks;
static int kfree_nrealthreads;
static atomic_t n_kfree_scale_thread_started;
static atomic_t n_kfree_scale_thread_ended;
+static struct task_struct *kthread_tp;
+static u64 kthread_stime;
struct kfree_obj {
char kfree_obj[8];
@@ -701,6 +751,10 @@ kfree_scale_init(void)
unsigned long jif_start;
unsigned long orig_jif;
+ pr_alert("%s" SCALE_FLAG
+ "--- kfree_rcu_test: kfree_mult=%d kfree_by_call_rcu=%d kfree_nthreads=%d kfree_alloc_num=%d kfree_loops=%d kfree_rcu_test_double=%d kfree_rcu_test_single=%d\n",
+ scale_type, kfree_mult, kfree_by_call_rcu, kfree_nthreads, kfree_alloc_num, kfree_loops, kfree_rcu_test_double, kfree_rcu_test_single);
+
// Also, do a quick self-test to ensure laziness is as much as
// expected.
if (kfree_by_call_rcu && !IS_ENABLED(CONFIG_RCU_LAZY)) {
@@ -797,6 +851,18 @@ rcu_scale_cleanup(void)
if (gp_exp && gp_async)
SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
+ // If built-in, just report all of the GP kthread's CPU time.
+ if (IS_BUILTIN(CONFIG_RCU_SCALE_TEST) && !kthread_tp && cur_ops->rso_gp_kthread)
+ kthread_tp = cur_ops->rso_gp_kthread();
+ if (kthread_tp) {
+ u32 ns;
+ u64 us;
+
+ kthread_stime = kthread_tp->stime - kthread_stime;
+ us = div_u64_rem(kthread_stime, 1000, &ns);
+ pr_info("rcu_scale: Grace-period kthread CPU time: %llu.%03u us\n", us, ns);
+ show_rcu_gp_kthreads();
+ }
if (kfree_rcu_test) {
kfree_scale_cleanup();
return;
@@ -885,7 +951,7 @@ rcu_scale_init(void)
long i;
int firsterr = 0;
static struct rcu_scale_ops *scale_ops[] = {
- &rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_TRACING_OPS
+ &rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
};
if (!torture_init_begin(scale_type, verbose))
@@ -910,6 +976,11 @@ rcu_scale_init(void)
if (cur_ops->init)
cur_ops->init();
+ if (cur_ops->rso_gp_kthread) {
+ kthread_tp = cur_ops->rso_gp_kthread();
+ if (kthread_tp)
+ kthread_stime = kthread_tp->stime;
+ }
if (kfree_rcu_test)
return kfree_scale_init();
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 147551c23baf..ade42d6a9d9b 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1581,6 +1581,7 @@ rcu_torture_writer(void *arg)
rcu_access_pointer(rcu_torture_current) !=
&rcu_tortures[i]) {
tracing_off();
+ show_rcu_gp_kthreads();
WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
rcu_ftrace_dump(DUMP_ALL);
}
@@ -1876,7 +1877,7 @@ static int
rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
{
int mask = rcutorture_extend_mask_max();
- unsigned long randmask1 = torture_random(trsp) >> 8;
+ unsigned long randmask1 = torture_random(trsp);
unsigned long randmask2 = randmask1 >> 3;
unsigned long preempts = RCUTORTURE_RDR_PREEMPT | RCUTORTURE_RDR_SCHED;
unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ;
@@ -1935,7 +1936,7 @@ rcutorture_loop_extend(int *readstate, struct torture_random_state *trsp,
if (!((mask - 1) & mask))
return rtrsp; /* Current RCU reader not extendable. */
/* Bias towards larger numbers of loops. */
- i = (torture_random(trsp) >> 3);
+ i = torture_random(trsp);
i = ((i | (i >> 3)) & RCUTORTURE_RDR_MAX_LOOPS) + 1;
for (j = 0; j < i; j++) {
mask = rcutorture_extend_mask(*readstate, trsp);
@@ -2136,7 +2137,7 @@ static int rcu_nocb_toggle(void *arg)
toggle_fuzz = NSEC_PER_USEC;
do {
r = torture_random(&rand);
- cpu = (r >> 4) % (maxcpu + 1);
+ cpu = (r >> 1) % (maxcpu + 1);
if (r & 0x1) {
rcu_nocb_cpu_offload(cpu);
atomic_long_inc(&n_nocb_offload);
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index 1970ce5f22d4..91a0fd0d4d9a 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -528,6 +528,38 @@ static struct ref_scale_ops clock_ops = {
.name = "clock"
};
+static void ref_jiffies_section(const int nloops)
+{
+ u64 x = 0;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--)
+ x += jiffies;
+ preempt_enable();
+ stopopts = x;
+}
+
+static void ref_jiffies_delay_section(const int nloops, const int udl, const int ndl)
+{
+ u64 x = 0;
+ int i;
+
+ preempt_disable();
+ for (i = nloops; i >= 0; i--) {
+ x += jiffies;
+ un_delay(udl, ndl);
+ }
+ preempt_enable();
+ stopopts = x;
+}
+
+static struct ref_scale_ops jiffies_ops = {
+ .readsection = ref_jiffies_section,
+ .delaysection = ref_jiffies_delay_section,
+ .name = "jiffies"
+};
+
////////////////////////////////////////////////////////////////////////
//
// Methods leveraging SLAB_TYPESAFE_BY_RCU.
@@ -1047,7 +1079,7 @@ ref_scale_init(void)
int firsterr = 0;
static struct ref_scale_ops *scale_ops[] = {
&rcu_ops, &srcu_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
- &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
+ &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops, &jiffies_ops,
&typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops,
};
@@ -1107,12 +1139,11 @@ ref_scale_init(void)
VERBOSE_SCALEOUT("Starting %d reader threads", nreaders);
for (i = 0; i < nreaders; i++) {
+ init_waitqueue_head(&reader_tasks[i].wq);
firsterr = torture_create_kthread(ref_scale_reader, (void *)i,
reader_tasks[i].task);
if (torture_init_error(firsterr))
goto unwind;
-
- init_waitqueue_head(&(reader_tasks[i].wq));
}
// Main Task
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index b770add3f843..8d65f7d576a3 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -25,6 +25,8 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
* @cblist: Callback list.
* @lock: Lock protecting per-CPU callback list.
* @rtp_jiffies: Jiffies counter value for statistics.
+ * @lazy_timer: Timer to unlazify callbacks.
+ * @urgent_gp: Number of additional non-lazy grace periods.
* @rtp_n_lock_retries: Rough lock-contention statistic.
* @rtp_work: Work queue for invoking callbacks.
* @rtp_irq_work: IRQ work queue for deferred wakeups.
@@ -38,6 +40,8 @@ struct rcu_tasks_percpu {
raw_spinlock_t __private lock;
unsigned long rtp_jiffies;
unsigned long rtp_n_lock_retries;
+ struct timer_list lazy_timer;
+ unsigned int urgent_gp;
struct work_struct rtp_work;
struct irq_work rtp_irq_work;
struct rcu_head barrier_q_head;
@@ -51,7 +55,6 @@ struct rcu_tasks_percpu {
* @cbs_wait: RCU wait allowing a new callback to get kthread's attention.
* @cbs_gbl_lock: Lock protecting callback list.
* @tasks_gp_mutex: Mutex protecting grace period, needed during mid-boot dead zone.
- * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
* @gp_func: This flavor's grace-period-wait function.
* @gp_state: Grace period's most recent state transition (debugging).
* @gp_sleep: Per-grace-period sleep to prevent CPU-bound looping.
@@ -61,6 +64,8 @@ struct rcu_tasks_percpu {
* @tasks_gp_seq: Number of grace periods completed since boot.
* @n_ipis: Number of IPIs sent to encourage grace periods to end.
* @n_ipis_fails: Number of IPI-send failures.
+ * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
+ * @lazy_jiffies: Number of jiffies to allow callbacks to be lazy.
* @pregp_func: This flavor's pre-grace-period function (optional).
* @pertask_func: This flavor's per-task scan function (optional).
* @postscan_func: This flavor's post-task scan function (optional).
@@ -92,6 +97,7 @@ struct rcu_tasks {
unsigned long n_ipis;
unsigned long n_ipis_fails;
struct task_struct *kthread_ptr;
+ unsigned long lazy_jiffies;
rcu_tasks_gp_func_t gp_func;
pregp_func_t pregp_func;
pertask_func_t pertask_func;
@@ -127,6 +133,7 @@ static struct rcu_tasks rt_name = \
.gp_func = gp, \
.call_func = call, \
.rtpcpu = &rt_name ## __percpu, \
+ .lazy_jiffies = DIV_ROUND_UP(HZ, 4), \
.name = n, \
.percpu_enqueue_shift = order_base_2(CONFIG_NR_CPUS), \
.percpu_enqueue_lim = 1, \
@@ -139,9 +146,7 @@ static struct rcu_tasks rt_name = \
#ifdef CONFIG_TASKS_RCU
/* Track exiting tasks in order to allow them to be waited for. */
DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
-#endif
-#ifdef CONFIG_TASKS_RCU
/* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */
static void tasks_rcu_exit_srcu_stall(struct timer_list *unused);
static DEFINE_TIMER(tasks_rcu_exit_srcu_stall_timer, tasks_rcu_exit_srcu_stall);
@@ -171,6 +176,8 @@ static int rcu_task_contend_lim __read_mostly = 100;
module_param(rcu_task_contend_lim, int, 0444);
static int rcu_task_collapse_lim __read_mostly = 10;
module_param(rcu_task_collapse_lim, int, 0444);
+static int rcu_task_lazy_lim __read_mostly = 32;
+module_param(rcu_task_lazy_lim, int, 0444);
/* RCU tasks grace-period state for debugging. */
#define RTGS_INIT 0
@@ -229,7 +236,7 @@ static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)
#endif /* #ifndef CONFIG_TINY_RCU */
// Initialize per-CPU callback lists for the specified flavor of
-// Tasks RCU.
+// Tasks RCU. Do not enqueue callbacks before this function is invoked.
static void cblist_init_generic(struct rcu_tasks *rtp)
{
int cpu;
@@ -237,7 +244,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
int lim;
int shift;
- raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
if (rcu_task_enqueue_lim < 0) {
rcu_task_enqueue_lim = 1;
rcu_task_cb_adjust = true;
@@ -260,22 +266,48 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
WARN_ON_ONCE(!rtpcp);
if (cpu)
raw_spin_lock_init(&ACCESS_PRIVATE(rtpcp, lock));
- raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
+ local_irq_save(flags); // serialize initialization
if (rcu_segcblist_empty(&rtpcp->cblist))
rcu_segcblist_init(&rtpcp->cblist);
+ local_irq_restore(flags);
INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq);
rtpcp->cpu = cpu;
rtpcp->rtpp = rtp;
if (!rtpcp->rtp_blkd_tasks.next)
INIT_LIST_HEAD(&rtpcp->rtp_blkd_tasks);
- raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
}
- raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d.\n", rtp->name,
data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim), rcu_task_cb_adjust);
}
+// Compute wakeup time for lazy callback timer.
+static unsigned long rcu_tasks_lazy_time(struct rcu_tasks *rtp)
+{
+ return jiffies + rtp->lazy_jiffies;
+}
+
+// Timer handler that unlazifies lazy callbacks.
+static void call_rcu_tasks_generic_timer(struct timer_list *tlp)
+{
+ unsigned long flags;
+ bool needwake = false;
+ struct rcu_tasks *rtp;
+ struct rcu_tasks_percpu *rtpcp = from_timer(rtpcp, tlp, lazy_timer);
+
+ rtp = rtpcp->rtpp;
+ raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
+ if (!rcu_segcblist_empty(&rtpcp->cblist) && rtp->lazy_jiffies) {
+ if (!rtpcp->urgent_gp)
+ rtpcp->urgent_gp = 1;
+ needwake = true;
+ mod_timer(&rtpcp->lazy_timer, rcu_tasks_lazy_time(rtp));
+ }
+ raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
+ if (needwake)
+ rcuwait_wake_up(&rtp->cbs_wait);
+}
+
// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp)
{
@@ -292,6 +324,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
{
int chosen_cpu;
unsigned long flags;
+ bool havekthread = smp_load_acquire(&rtp->kthread_ptr);
int ideal_cpu;
unsigned long j;
bool needadjust = false;
@@ -316,12 +349,19 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
READ_ONCE(rtp->percpu_enqueue_lim) != nr_cpu_ids)
needadjust = true; // Defer adjustment to avoid deadlock.
}
- if (!rcu_segcblist_is_enabled(&rtpcp->cblist)) {
- raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
- cblist_init_generic(rtp);
- raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
+ // Queuing callbacks before initialization not yet supported.
+ if (WARN_ON_ONCE(!rcu_segcblist_is_enabled(&rtpcp->cblist)))
+ rcu_segcblist_init(&rtpcp->cblist);
+ needwake = (func == wakeme_after_rcu) ||
+ (rcu_segcblist_n_cbs(&rtpcp->cblist) == rcu_task_lazy_lim);
+ if (havekthread && !needwake && !timer_pending(&rtpcp->lazy_timer)) {
+ if (rtp->lazy_jiffies)
+ mod_timer(&rtpcp->lazy_timer, rcu_tasks_lazy_time(rtp));
+ else
+ needwake = rcu_segcblist_empty(&rtpcp->cblist);
}
- needwake = rcu_segcblist_empty(&rtpcp->cblist);
+ if (needwake)
+ rtpcp->urgent_gp = 3;
rcu_segcblist_enqueue(&rtpcp->cblist, rhp);
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
if (unlikely(needadjust)) {
@@ -415,9 +455,14 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
}
rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq));
(void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq));
- if (rcu_segcblist_pend_cbs(&rtpcp->cblist))
+ if (rtpcp->urgent_gp > 0 && rcu_segcblist_pend_cbs(&rtpcp->cblist)) {
+ if (rtp->lazy_jiffies)
+ rtpcp->urgent_gp--;
needgpcb |= 0x3;
- if (!rcu_segcblist_empty(&rtpcp->cblist))
+ } else if (rcu_segcblist_empty(&rtpcp->cblist)) {
+ rtpcp->urgent_gp = 0;
+ }
+ if (rcu_segcblist_ready_cbs(&rtpcp->cblist))
needgpcb |= 0x1;
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
}
@@ -525,10 +570,12 @@ static void rcu_tasks_one_gp(struct rcu_tasks *rtp, bool midboot)
if (unlikely(midboot)) {
needgpcb = 0x2;
} else {
+ mutex_unlock(&rtp->tasks_gp_mutex);
set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
rcuwait_wait_event(&rtp->cbs_wait,
(needgpcb = rcu_tasks_need_gpcb(rtp)),
TASK_IDLE);
+ mutex_lock(&rtp->tasks_gp_mutex);
}
if (needgpcb & 0x2) {
@@ -549,11 +596,19 @@ static void rcu_tasks_one_gp(struct rcu_tasks *rtp, bool midboot)
// RCU-tasks kthread that detects grace periods and invokes callbacks.
static int __noreturn rcu_tasks_kthread(void *arg)
{
+ int cpu;
struct rcu_tasks *rtp = arg;
+ for_each_possible_cpu(cpu) {
+ struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
+
+ timer_setup(&rtpcp->lazy_timer, call_rcu_tasks_generic_timer, 0);
+ rtpcp->urgent_gp = 1;
+ }
+
/* Run on housekeeping CPUs by default. Sysadm can move if desired. */
housekeeping_affine(current, HK_TYPE_RCU);
- WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
+ smp_store_release(&rtp->kthread_ptr, current); // Let GPs start!
/*
* Each pass through the following loop makes one check for
@@ -635,16 +690,22 @@ static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
{
int cpu;
bool havecbs = false;
+ bool haveurgent = false;
+ bool haveurgentcbs = false;
for_each_possible_cpu(cpu) {
struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
- if (!data_race(rcu_segcblist_empty(&rtpcp->cblist))) {
+ if (!data_race(rcu_segcblist_empty(&rtpcp->cblist)))
havecbs = true;
+ if (data_race(rtpcp->urgent_gp))
+ haveurgent = true;
+ if (!data_race(rcu_segcblist_empty(&rtpcp->cblist)) && data_race(rtpcp->urgent_gp))
+ haveurgentcbs = true;
+ if (havecbs && haveurgent && haveurgentcbs)
break;
- }
}
- pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
+ pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c%c%c l:%lu %s\n",
rtp->kname,
tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
jiffies - data_race(rtp->gp_jiffies),
@@ -652,6 +713,9 @@ static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
".k"[!!data_race(rtp->kthread_ptr)],
".C"[havecbs],
+ ".u"[haveurgent],
+ ".U"[haveurgentcbs],
+ rtp->lazy_jiffies,
s);
}
#endif // #ifndef CONFIG_TINY_RCU
@@ -1020,11 +1084,16 @@ void rcu_barrier_tasks(void)
}
EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
+int rcu_tasks_lazy_ms = -1;
+module_param(rcu_tasks_lazy_ms, int, 0444);
+
static int __init rcu_spawn_tasks_kthread(void)
{
cblist_init_generic(&rcu_tasks);
rcu_tasks.gp_sleep = HZ / 10;
rcu_tasks.init_fract = HZ / 10;
+ if (rcu_tasks_lazy_ms >= 0)
+ rcu_tasks.lazy_jiffies = msecs_to_jiffies(rcu_tasks_lazy_ms);
rcu_tasks.pregp_func = rcu_tasks_pregp_step;
rcu_tasks.pertask_func = rcu_tasks_pertask;
rcu_tasks.postscan_func = rcu_tasks_postscan;
@@ -1042,6 +1111,12 @@ void show_rcu_tasks_classic_gp_kthread(void)
EXPORT_SYMBOL_GPL(show_rcu_tasks_classic_gp_kthread);
#endif // !defined(CONFIG_TINY_RCU)
+struct task_struct *get_rcu_tasks_gp_kthread(void)
+{
+ return rcu_tasks.kthread_ptr;
+}
+EXPORT_SYMBOL_GPL(get_rcu_tasks_gp_kthread);
+
/*
* Contribute to protect against tasklist scan blind spot while the
* task is exiting and may be removed from the tasklist. See
@@ -1173,10 +1248,15 @@ void rcu_barrier_tasks_rude(void)
}
EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);
+int rcu_tasks_rude_lazy_ms = -1;
+module_param(rcu_tasks_rude_lazy_ms, int, 0444);
+
static int __init rcu_spawn_tasks_rude_kthread(void)
{
cblist_init_generic(&rcu_tasks_rude);
rcu_tasks_rude.gp_sleep = HZ / 10;
+ if (rcu_tasks_rude_lazy_ms >= 0)
+ rcu_tasks_rude.lazy_jiffies = msecs_to_jiffies(rcu_tasks_rude_lazy_ms);
rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
return 0;
}
@@ -1188,6 +1268,13 @@ void show_rcu_tasks_rude_gp_kthread(void)
}
EXPORT_SYMBOL_GPL(show_rcu_tasks_rude_gp_kthread);
#endif // !defined(CONFIG_TINY_RCU)
+
+struct task_struct *get_rcu_tasks_rude_gp_kthread(void)
+{
+ return rcu_tasks_rude.kthread_ptr;
+}
+EXPORT_SYMBOL_GPL(get_rcu_tasks_rude_gp_kthread);
+
#endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
////////////////////////////////////////////////////////////////////////
@@ -1793,6 +1880,9 @@ void rcu_barrier_tasks_trace(void)
}
EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace);
+int rcu_tasks_trace_lazy_ms = -1;
+module_param(rcu_tasks_trace_lazy_ms, int, 0444);
+
static int __init rcu_spawn_tasks_trace_kthread(void)
{
cblist_init_generic(&rcu_tasks_trace);
@@ -1807,6 +1897,8 @@ static int __init rcu_spawn_tasks_trace_kthread(void)
if (rcu_tasks_trace.init_fract <= 0)
rcu_tasks_trace.init_fract = 1;
}
+ if (rcu_tasks_trace_lazy_ms >= 0)
+ rcu_tasks_trace.lazy_jiffies = msecs_to_jiffies(rcu_tasks_trace_lazy_ms);
rcu_tasks_trace.pregp_func = rcu_tasks_trace_pregp_step;
rcu_tasks_trace.postscan_func = rcu_tasks_trace_postscan;
rcu_tasks_trace.holdouts_func = check_all_holdout_tasks_trace;
@@ -1830,6 +1922,12 @@ void show_rcu_tasks_trace_gp_kthread(void)
EXPORT_SYMBOL_GPL(show_rcu_tasks_trace_gp_kthread);
#endif // !defined(CONFIG_TINY_RCU)
+struct task_struct *get_rcu_tasks_trace_gp_kthread(void)
+{
+ return rcu_tasks_trace.kthread_ptr;
+}
+EXPORT_SYMBOL_GPL(get_rcu_tasks_trace_gp_kthread);
+
#else /* #ifdef CONFIG_TASKS_TRACE_RCU */
static void exit_tasks_rcu_finish_trace(struct task_struct *t) { }
#endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1449cb69a0e0..cb1caefa8bd0 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -632,7 +632,7 @@ void __rcu_irq_enter_check_tick(void)
// prevents self-deadlock. So we can safely recheck under the lock.
// Note that the nohz_full state currently cannot change.
raw_spin_lock_rcu_node(rdp->mynode);
- if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
+ if (READ_ONCE(rdp->rcu_urgent_qs) && !rdp->rcu_forced_tick) {
// A nohz_full CPU is in the kernel and RCU needs a
// quiescent state. Turn on the tick!
WRITE_ONCE(rdp->rcu_forced_tick, true);
@@ -677,12 +677,16 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
}
/**
- * rcu_is_watching - see if RCU thinks that the current CPU is not idle
+ * rcu_is_watching - RCU read-side critical sections permitted on current CPU?
*
- * Return true if RCU is watching the running CPU, which means that this
- * CPU can safely enter RCU read-side critical sections. In other words,
- * if the current CPU is not in its idle loop or is in an interrupt or
- * NMI handler, return true.
+ * Return @true if RCU is watching the running CPU and @false otherwise.
+ * An @true return means that this CPU can safely enter RCU read-side
+ * critical sections.
+ *
+ * Although calls to rcu_is_watching() from most parts of the kernel
+ * will return @true, there are important exceptions. For example, if the
+ * current CPU is deep within its idle loop, in kernel entry/exit code,
+ * or offline, rcu_is_watching() will return @false.
*
* Make notrace because it can be called by the internal functions of
* ftrace, and making this notrace removes unnecessary recursion calls.
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index 43229d2b0c44..5598212d1f27 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -77,9 +77,9 @@ __setup("rcu_nocbs", rcu_nocb_setup);
static int __init parse_rcu_nocb_poll(char *arg)
{
rcu_nocb_poll = true;
- return 0;
+ return 1;
}
-early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
+__setup("rcu_nocb_poll", parse_rcu_nocb_poll);
/*
* Don't bother bypassing ->cblist if the call_rcu() rate is low.
diff --git a/kernel/torture.c b/kernel/torture.c
index 1a0519b836ac..b28b05bbef02 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -37,6 +37,7 @@
#include <linux/ktime.h>
#include <asm/byteorder.h>
#include <linux/torture.h>
+#include <linux/sched/rt.h>
#include "rcu/rcu.h"
MODULE_LICENSE("GPL");
@@ -54,6 +55,9 @@ module_param(verbose_sleep_frequency, int, 0444);
static int verbose_sleep_duration = 1;
module_param(verbose_sleep_duration, int, 0444);
+static int random_shuffle;
+module_param(random_shuffle, int, 0444);
+
static char *torture_type;
static int verbose;
@@ -88,8 +92,8 @@ int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_s
ktime_t hto = baset_ns;
if (trsp)
- hto += (torture_random(trsp) >> 3) % fuzzt_ns;
- set_current_state(TASK_UNINTERRUPTIBLE);
+ hto += torture_random(trsp) % fuzzt_ns;
+ set_current_state(TASK_IDLE);
return schedule_hrtimeout(&hto, HRTIMER_MODE_REL);
}
EXPORT_SYMBOL_GPL(torture_hrtimeout_ns);
@@ -350,22 +354,22 @@ torture_onoff(void *arg)
if (onoff_holdoff > 0) {
VERBOSE_TOROUT_STRING("torture_onoff begin holdoff");
- schedule_timeout_interruptible(onoff_holdoff);
+ torture_hrtimeout_jiffies(onoff_holdoff, &rand);
VERBOSE_TOROUT_STRING("torture_onoff end holdoff");
}
while (!torture_must_stop()) {
if (disable_onoff_at_boot && !rcu_inkernel_boot_has_ended()) {
- schedule_timeout_interruptible(HZ / 10);
+ torture_hrtimeout_jiffies(HZ / 10, &rand);
continue;
}
- cpu = (torture_random(&rand) >> 4) % (maxcpu + 1);
+ cpu = torture_random(&rand) % (maxcpu + 1);
if (!torture_offline(cpu,
&n_offline_attempts, &n_offline_successes,
&sum_offline, &min_offline, &max_offline))
torture_online(cpu,
&n_online_attempts, &n_online_successes,
&sum_online, &min_online, &max_online);
- schedule_timeout_interruptible(onoff_interval);
+ torture_hrtimeout_jiffies(onoff_interval, &rand);
}
stop:
@@ -518,6 +522,7 @@ static void torture_shuffle_task_unregister_all(void)
*/
static void torture_shuffle_tasks(void)
{
+ DEFINE_TORTURE_RANDOM(rand);
struct shuffle_task *stp;
cpumask_setall(shuffle_tmp_mask);
@@ -537,8 +542,10 @@ static void torture_shuffle_tasks(void)
cpumask_clear_cpu(shuffle_idle_cpu, shuffle_tmp_mask);
mutex_lock(&shuffle_task_mutex);
- list_for_each_entry(stp, &shuffle_task_list, st_l)
- set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask);
+ list_for_each_entry(stp, &shuffle_task_list, st_l) {
+ if (!random_shuffle || torture_random(&rand) & 0x1)
+ set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask);
+ }
mutex_unlock(&shuffle_task_mutex);
cpus_read_unlock();
@@ -550,9 +557,11 @@ static void torture_shuffle_tasks(void)
*/
static int torture_shuffle(void *arg)
{
+ DEFINE_TORTURE_RANDOM(rand);
+
VERBOSE_TOROUT_STRING("torture_shuffle task started");
do {
- schedule_timeout_interruptible(shuffle_interval);
+ torture_hrtimeout_jiffies(shuffle_interval, &rand);
torture_shuffle_tasks();
torture_shutdown_absorb("torture_shuffle");
} while (!torture_must_stop());
@@ -728,12 +737,12 @@ bool stutter_wait(const char *title)
cond_resched_tasks_rcu_qs();
spt = READ_ONCE(stutter_pause_test);
for (; spt; spt = READ_ONCE(stutter_pause_test)) {
- if (!ret) {
+ if (!ret && !rt_task(current)) {
sched_set_normal(current, MAX_NICE);
ret = true;
}
if (spt == 1) {
- schedule_timeout_interruptible(1);
+ torture_hrtimeout_jiffies(1, NULL);
} else if (spt == 2) {
while (READ_ONCE(stutter_pause_test)) {
if (!(i++ & 0xffff))
@@ -741,7 +750,7 @@ bool stutter_wait(const char *title)
cond_resched();
}
} else {
- schedule_timeout_interruptible(round_jiffies_relative(HZ));
+ torture_hrtimeout_jiffies(round_jiffies_relative(HZ), NULL);
}
torture_shutdown_absorb(title);
}
@@ -926,7 +935,7 @@ EXPORT_SYMBOL_GPL(torture_kthread_stopping);
* it starts, you will need to open-code your own.
*/
int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
- char *f, struct task_struct **tp)
+ char *f, struct task_struct **tp, void (*cbf)(struct task_struct *tp))
{
int ret = 0;
@@ -938,6 +947,10 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
*tp = NULL;
return ret;
}
+
+ if (cbf)
+ cbf(*tp);
+
wake_up_process(*tp); // Process is sleeping, so ordering provided.
torture_shuffle_task_register(*tp);
return ret;
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 880fde13d9b8..a9841148cde2 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -7457,6 +7457,30 @@ sub process {
}
}
+# Complain about RCU Tasks Trace used outside of BPF (and of course, RCU).
+ our $rcu_trace_funcs = qr{(?x:
+ rcu_read_lock_trace |
+ rcu_read_lock_trace_held |
+ rcu_read_unlock_trace |
+ call_rcu_tasks_trace |
+ synchronize_rcu_tasks_trace |
+ rcu_barrier_tasks_trace |
+ rcu_request_urgent_qs_task
+ )};
+ our $rcu_trace_paths = qr{(?x:
+ kernel/bpf/ |
+ include/linux/bpf |
+ net/bpf/ |
+ kernel/rcu/ |
+ include/linux/rcu
+ )};
+ if ($line =~ /\b($rcu_trace_funcs)\s*\(/) {
+ if ($realfile !~ m{^$rcu_trace_paths}) {
+ WARN("RCU_TASKS_TRACE",
+ "use of RCU tasks trace is incorrect outside BPF or core RCU code\n" . $herecurr);
+ }
+ }
+
# check for lockdep_set_novalidate_class
if ($line =~ /^.\s*lockdep_set_novalidate_class\s*\(/ ||
$line =~ /__lockdep_no_validate__\s*\)/ ) {
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
index b582113178ac..f683e424ddd5 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
@@ -40,6 +40,10 @@ awk '
sum += $5 / 1000.;
}
+/rcu_scale: Grace-period kthread CPU time/ {
+ cputime = $6;
+}
+
END {
newNR = asort(gptimes);
if (newNR <= 0) {
@@ -78,6 +82,8 @@ END {
print "90th percentile grace-period duration: " gptimes[pct90];
print "99th percentile grace-period duration: " gptimes[pct99];
print "Maximum grace-period duration: " gptimes[newNR];
- print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
+ if (cputime != "")
+ cpustr = " CPU: " cputime;
+ print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches cpustr;
print "Computed from rcuscale printk output.";
}'
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
index 6a00157bee5b..b1ffd7c67604 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
@@ -2,5 +2,7 @@ CONFIG_RCU_SCALE_TEST=y
CONFIG_PRINTK_TIME=y
CONFIG_FORCE_TASKS_RCU=y
#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_RUDE_RCU=y
+#CHECK#CONFIG_TASKS_RUDE_RCU=y
CONFIG_FORCE_TASKS_TRACE_RCU=y
#CHECK#CONFIG_TASKS_TRACE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
index 227aba7783af..0059592c7408 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
@@ -2,6 +2,8 @@ CONFIG_SMP=y
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_TREE_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
index ef2b501a6971..67f9d2998afd 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
@@ -2,6 +2,7 @@ CONFIG_SMP=y
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_PREEMPT_RCU=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y