diff options
-rw-r--r-- | Documentation/RCU/checklist.rst | 25 | ||||
-rw-r--r-- | Documentation/RCU/rcu_dereference.rst | 27 | ||||
-rw-r--r-- | Documentation/RCU/torture.rst | 2 | ||||
-rw-r--r-- | Documentation/memory-barriers.txt | 17 | ||||
-rw-r--r-- | include/linux/rculist.h | 2 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 6 | ||||
-rw-r--r-- | include/linux/srcu.h | 2 | ||||
-rw-r--r-- | kernel/locking/locktorture.c | 18 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 4 | ||||
-rw-r--r-- | kernel/rcu/srcutree.c | 24 | ||||
-rw-r--r-- | kernel/rcu/tasks.h | 4 | ||||
-rwxr-xr-x | tools/testing/selftests/rcutorture/bin/mkinitrd.sh | 5 | ||||
-rw-r--r-- | tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot | 3 |
13 files changed, 87 insertions, 52 deletions
diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst index bd3c58c44bef..2d42998a89a6 100644 --- a/Documentation/RCU/checklist.rst +++ b/Documentation/RCU/checklist.rst @@ -241,15 +241,22 @@ over a rather long period of time, but improvements are always welcome! srcu_struct. The rules for the expedited RCU grace-period-wait primitives are the same as for their non-expedited counterparts. - If the updater uses call_rcu_tasks() or synchronize_rcu_tasks(), - then the readers must refrain from executing voluntary - context switches, that is, from blocking. If the updater uses - call_rcu_tasks_trace() or synchronize_rcu_tasks_trace(), then - the corresponding readers must use rcu_read_lock_trace() and - rcu_read_unlock_trace(). If an updater uses call_rcu_tasks_rude() - or synchronize_rcu_tasks_rude(), then the corresponding readers - must use anything that disables preemption, for example, - preempt_disable() and preempt_enable(). + Similarly, it is necessary to correctly use the RCU Tasks flavors: + + a. If the updater uses synchronize_rcu_tasks() or + call_rcu_tasks(), then the readers must refrain from + executing voluntary context switches, that is, from + blocking. + + b. If the updater uses call_rcu_tasks_trace() + or synchronize_rcu_tasks_trace(), then the + corresponding readers must use rcu_read_lock_trace() + and rcu_read_unlock_trace(). + + c. If an updater uses call_rcu_tasks_rude() or + synchronize_rcu_tasks_rude(), then the corresponding + readers must use anything that disables preemption, + for example, preempt_disable() and preempt_enable(). Mixing things up will result in confusion and broken kernels, and has even resulted in an exploitable security issue. Therefore, diff --git a/Documentation/RCU/rcu_dereference.rst b/Documentation/RCU/rcu_dereference.rst index 3b739f6243c8..659d5913784d 100644 --- a/Documentation/RCU/rcu_dereference.rst +++ b/Documentation/RCU/rcu_dereference.rst @@ -3,13 +3,26 @@ PROPER CARE AND FEEDING OF RETURN VALUES FROM rcu_dereference() =============================================================== -Most of the time, you can use values from rcu_dereference() or one of -the similar primitives without worries. Dereferencing (prefix "*"), -field selection ("->"), assignment ("="), address-of ("&"), addition and -subtraction of constants, and casts all work quite naturally and safely. - -It is nevertheless possible to get into trouble with other operations. -Follow these rules to keep your RCU code working properly: +Proper care and feeding of address and data dependencies is critically +important to correct use of things like RCU. To this end, the pointers +returned from the rcu_dereference() family of primitives carry address and +data dependencies. These dependencies extend from the rcu_dereference() +macro's load of the pointer to the later use of that pointer to compute +either the address of a later memory access (representing an address +dependency) or the value written by a later memory access (representing +a data dependency). + +Most of the time, these dependencies are preserved, permitting you to +freely use values from rcu_dereference(). For example, dereferencing +(prefix "*"), field selection ("->"), assignment ("="), address-of +("&"), casts, and addition or subtraction of constants all work quite +naturally and safely. However, because current compilers do not take +either address or data dependencies into account it is still possible +to get into trouble. + +Follow these rules to preserve the address and data dependencies emanating +from your calls to rcu_dereference() and friends, thus keeping your RCU +readers working properly: - You must use one of the rcu_dereference() family of primitives to load an RCU-protected pointer, otherwise CONFIG_PROVE_RCU diff --git a/Documentation/RCU/torture.rst b/Documentation/RCU/torture.rst index b3b6dfa85248..49e7beea6ae1 100644 --- a/Documentation/RCU/torture.rst +++ b/Documentation/RCU/torture.rst @@ -185,7 +185,7 @@ argument. Not all changes require that all scenarios be run. For example, a change to Tree SRCU might run only the SRCU-N and SRCU-P scenarios using the --configs argument to kvm.sh as follows: "--configs 'SRCU-N SRCU-P'". -Large systems can run multiple copies of of the full set of scenarios, +Large systems can run multiple copies of the full set of scenarios, for example, a system with 448 hardware threads can run five instances of the full set concurrently. To make this happen:: diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index d414e145f912..4202174a6262 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -396,10 +396,11 @@ Memory barriers come in four basic varieties: (2) Address-dependency barriers (historical). - [!] This section is marked as HISTORICAL: For more up-to-date - information, including how compiler transformations related to pointer - comparisons can sometimes cause problems, see - Documentation/RCU/rcu_dereference.rst. + [!] This section is marked as HISTORICAL: it covers the long-obsolete + smp_read_barrier_depends() macro, the semantics of which are now + implicit in all marked accesses. For more up-to-date information, + including how compiler transformations can sometimes break address + dependencies, see Documentation/RCU/rcu_dereference.rst. An address-dependency barrier is a weaker form of read barrier. In the case where two loads are performed such that the second depends on the @@ -560,9 +561,11 @@ There are certain things that the Linux kernel memory barriers do not guarantee: ADDRESS-DEPENDENCY BARRIERS (HISTORICAL) ---------------------------------------- -[!] This section is marked as HISTORICAL: For more up-to-date information, -including how compiler transformations related to pointer comparisons can -sometimes cause problems, see Documentation/RCU/rcu_dereference.rst. +[!] This section is marked as HISTORICAL: it covers the long-obsolete +smp_read_barrier_depends() macro, the semantics of which are now implicit +in all marked accesses. For more up-to-date information, including +how compiler transformations can sometimes break address dependencies, +see Documentation/RCU/rcu_dereference.rst. As of v4.15 of the Linux kernel, an smp_mb() was added to READ_ONCE() for DEC Alpha, which means that about the only people who need to pay attention diff --git a/include/linux/rculist.h b/include/linux/rculist.h index d29740be4833..3dc1e58865f7 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -355,7 +355,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, }) /** - * list_next_or_null_rcu - get the first element from a list + * list_next_or_null_rcu - get the next element from a list * @head: the head for the list. * @ptr: the list head to take the next element from. * @type: the type of the struct this is embedded in. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index aa87c82236dd..0746b1b0b663 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -298,6 +298,11 @@ static inline void rcu_lock_acquire(struct lockdep_map *map) lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_); } +static inline void rcu_try_lock_acquire(struct lockdep_map *map) +{ + lock_acquire(map, 0, 1, 2, 0, NULL, _THIS_IP_); +} + static inline void rcu_lock_release(struct lockdep_map *map) { lock_release(map, _THIS_IP_); @@ -312,6 +317,7 @@ int rcu_read_lock_any_held(void); #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ # define rcu_lock_acquire(a) do { } while (0) +# define rcu_try_lock_acquire(a) do { } while (0) # define rcu_lock_release(a) do { } while (0) static inline int rcu_read_lock_held(void) diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 127ef3b2e607..236610e4a8fa 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -229,7 +229,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp srcu_check_nmi_safety(ssp, true); retval = __srcu_read_lock_nmisafe(ssp); - rcu_lock_acquire(&ssp->dep_map); + rcu_try_lock_acquire(&ssp->dep_map); return retval; } diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 69d3cd2cfc3b..415d81e6ce70 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -124,7 +124,7 @@ struct call_rcu_chain { struct rcu_head crc_rh; bool crc_stop; }; -struct call_rcu_chain *call_rcu_chain; +struct call_rcu_chain *call_rcu_chain_list; /* Forward reference. */ static void lock_torture_cleanup(void); @@ -1074,12 +1074,12 @@ static int call_rcu_chain_init(void) if (call_rcu_chains <= 0) return 0; - call_rcu_chain = kcalloc(call_rcu_chains, sizeof(*call_rcu_chain), GFP_KERNEL); - if (!call_rcu_chain) + call_rcu_chain_list = kcalloc(call_rcu_chains, sizeof(*call_rcu_chain_list), GFP_KERNEL); + if (!call_rcu_chain_list) return -ENOMEM; for (i = 0; i < call_rcu_chains; i++) { - call_rcu_chain[i].crc_stop = false; - call_rcu(&call_rcu_chain[i].crc_rh, call_rcu_chain_cb); + call_rcu_chain_list[i].crc_stop = false; + call_rcu(&call_rcu_chain_list[i].crc_rh, call_rcu_chain_cb); } return 0; } @@ -1089,13 +1089,13 @@ static void call_rcu_chain_cleanup(void) { int i; - if (!call_rcu_chain) + if (!call_rcu_chain_list) return; for (i = 0; i < call_rcu_chains; i++) - smp_store_release(&call_rcu_chain[i].crc_stop, true); + smp_store_release(&call_rcu_chain_list[i].crc_stop, true); rcu_barrier(); - kfree(call_rcu_chain); - call_rcu_chain = NULL; + kfree(call_rcu_chain_list); + call_rcu_chain_list = NULL; } static void lock_torture_cleanup(void) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 07a6a183c555..7567ca8e743c 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -3874,7 +3874,9 @@ rcu_torture_init(void) } if (fqs_duration < 0) fqs_duration = 0; - if (fqs_duration) { + if (fqs_holdoff < 0) + fqs_holdoff = 0; + if (fqs_duration && fqs_holdoff) { /* Create the fqs thread */ firsterr = torture_create_kthread(rcu_torture_fqs, NULL, fqs_task); diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 560e99ec5333..0351a4e83529 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -772,20 +772,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe); */ static void srcu_gp_start(struct srcu_struct *ssp) { - struct srcu_data *sdp; int state; - if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER) - sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id()); - else - sdp = this_cpu_ptr(ssp->sda); lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)); - spin_lock_rcu_node(sdp); /* Interrupts already disabled. */ - rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); - WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL)); - spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */ WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies); WRITE_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay, 0); smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ @@ -1271,9 +1261,11 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, * period (gp_num = X + 8). So acceleration fails. */ s = rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq); - rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); - WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s) && rhp); + if (rhp) { + rcu_segcblist_advance(&sdp->srcu_cblist, + rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); + WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s)); + } if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { sdp->srcu_gp_seq_needed = s; needgp = true; @@ -1723,6 +1715,11 @@ static void srcu_invoke_callbacks(struct work_struct *work) WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL)); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); + /* + * Although this function is theoretically re-entrant, concurrent + * callbacks invocation is disallowed to avoid executing an SRCU barrier + * too early. + */ if (sdp->srcu_cblist_invoking || !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { spin_unlock_irq_rcu_node(sdp); @@ -1753,6 +1750,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) sdp->srcu_cblist_invoking = false; more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); spin_unlock_irq_rcu_node(sdp); + /* An SRCU barrier or callbacks from previous nesting work pending */ if (more) srcu_schedule_cbs_sdp(sdp, 0); } diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index f54d5782eca0..732ad5b39946 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -975,7 +975,7 @@ static void check_holdout_task(struct task_struct *t, t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) || !rcu_tasks_is_holdout(t) || (IS_ENABLED(CONFIG_NO_HZ_FULL) && - !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) { + !is_idle_task(t) && READ_ONCE(t->rcu_tasks_idle_cpu) >= 0)) { WRITE_ONCE(t->rcu_tasks_holdout, false); list_del_init(&t->rcu_tasks_holdout_list); put_task_struct(t); @@ -993,7 +993,7 @@ static void check_holdout_task(struct task_struct *t, t, ".I"[is_idle_task(t)], "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)], t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout, - t->rcu_tasks_idle_cpu, cpu); + data_race(t->rcu_tasks_idle_cpu), cpu); sched_show_task(t); } diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh index 212c52ca90b5..f3f867129560 100755 --- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh +++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh @@ -67,7 +67,10 @@ ___EOF___ # build using nolibc on supported archs (smaller executable) and fall # back to regular glibc on other ones. if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \ - "||__ARM_EABI__||__aarch64__||__s390x__||__loongarch__\nyes\n#endif" \ + "||__ARM_EABI__||__aarch64__||(__mips__ && _ABIO32)" \ + "||__powerpc__||(__riscv && __riscv_xlen == 64)" \ + "||__s390x__||__loongarch__" \ + "\nyes\n#endif" \ | ${CROSS_COMPILE}gcc -E -nostdlib -xc - \ | grep -q '^yes'; then # architecture supported by nolibc diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot index d44609937503..979edbf4c820 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot @@ -1 +1,4 @@ nohz_full=2-9 +rcutorture.stall_cpu=14 +rcutorture.stall_cpu_holdoff=90 +rcutorture.fwd_progress=0 |