diff options
-rw-r--r-- | Documentation/RCU/Design/Data-Structures/Data-Structures.rst | 2 | ||||
-rw-r--r-- | Documentation/RCU/Design/Requirements/Requirements.rst | 4 | ||||
-rw-r--r-- | Documentation/RCU/whatisRCU.rst | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/page_track.c | 6 | ||||
-rw-r--r-- | include/linux/rculist.h | 48 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 12 | ||||
-rw-r--r-- | include/linux/rcutiny.h | 1 | ||||
-rw-r--r-- | include/linux/rcutree.h | 1 | ||||
-rw-r--r-- | include/trace/events/rcu.h | 54 | ||||
-rw-r--r-- | kernel/entry/common.c | 2 | ||||
-rw-r--r-- | kernel/rcu/rcu_segcblist.c | 10 | ||||
-rw-r--r-- | kernel/rcu/srcutree.c | 13 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 92 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 1 | ||||
-rw-r--r-- | kernel/rcu/tree_exp.h | 6 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 8 | ||||
-rw-r--r-- | kernel/rcu/tree_stall.h | 8 | ||||
-rw-r--r-- | kernel/rcu/update.c | 13 |
18 files changed, 158 insertions, 125 deletions
diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst index 4a48e20a46f2..f4efd6897b09 100644 --- a/Documentation/RCU/Design/Data-Structures/Data-Structures.rst +++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.rst @@ -963,7 +963,7 @@ exit and perhaps also vice versa. Therefore, whenever the ``->dynticks_nesting`` field is incremented up from zero, the ``->dynticks_nmi_nesting`` field is set to a large positive number, and whenever the ``->dynticks_nesting`` field is decremented down to zero, -the the ``->dynticks_nmi_nesting`` field is set to zero. Assuming that +the ``->dynticks_nmi_nesting`` field is set to zero. Assuming that the number of misnested interrupts is not sufficient to overflow the counter, this approach corrects the ``->dynticks_nmi_nesting`` field every time the corresponding CPU enters the idle loop from process diff --git a/Documentation/RCU/Design/Requirements/Requirements.rst b/Documentation/RCU/Design/Requirements/Requirements.rst index 8f41ad0aa753..1ae79a10a8de 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.rst +++ b/Documentation/RCU/Design/Requirements/Requirements.rst @@ -2162,7 +2162,7 @@ scheduling-clock interrupt be enabled when RCU needs it to be: this sort of thing. #. If a CPU is in a portion of the kernel that is absolutely positively no-joking guaranteed to never execute any RCU read-side critical - sections, and RCU believes this CPU to to be idle, no problem. This + sections, and RCU believes this CPU to be idle, no problem. This sort of thing is used by some architectures for light-weight exception handlers, which can then avoid the overhead of ``rcu_irq_enter()`` and ``rcu_irq_exit()`` at exception entry and @@ -2431,7 +2431,7 @@ However, there are legitimate preemptible-RCU implementations that do not have this property, given that any point in the code outside of an RCU read-side critical section can be a quiescent state. Therefore, *RCU-sched* was created, which follows “classic” RCU in that an -RCU-sched grace period waits for for pre-existing interrupt and NMI +RCU-sched grace period waits for pre-existing interrupt and NMI handlers. In kernels built with ``CONFIG_PREEMPT=n``, the RCU and RCU-sched APIs have identical implementations, while kernels built with ``CONFIG_PREEMPT=y`` provide a separate implementation for each. diff --git a/Documentation/RCU/whatisRCU.rst b/Documentation/RCU/whatisRCU.rst index c7f147b8034f..fb3ff76c3e73 100644 --- a/Documentation/RCU/whatisRCU.rst +++ b/Documentation/RCU/whatisRCU.rst @@ -360,7 +360,7 @@ order to amortize their overhead over many uses of the corresponding APIs. There are at least three flavors of RCU usage in the Linux kernel. The diagram above shows the most common one. On the updater side, the rcu_assign_pointer(), -sychronize_rcu() and call_rcu() primitives used are the same for all three +synchronize_rcu() and call_rcu() primitives used are the same for all three flavors. However for protection (on the reader side), the primitives used vary depending on the flavor: diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index a84a141a2ad2..8443a675715b 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -229,7 +229,8 @@ void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, return; idx = srcu_read_lock(&head->track_srcu); - hlist_for_each_entry_rcu(n, &head->track_notifier_list, node) + hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, + srcu_read_lock_held(&head->track_srcu)) if (n->track_write) n->track_write(vcpu, gpa, new, bytes, n); srcu_read_unlock(&head->track_srcu, idx); @@ -254,7 +255,8 @@ void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot) return; idx = srcu_read_lock(&head->track_srcu); - hlist_for_each_entry_rcu(n, &head->track_notifier_list, node) + hlist_for_each_entry_srcu(n, &head->track_notifier_list, node, + srcu_read_lock_held(&head->track_srcu)) if (n->track_flush_slot) n->track_flush_slot(kvm, slot, n); srcu_read_unlock(&head->track_srcu, idx); diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 7a6fc9956510..f8633d37e358 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -63,9 +63,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) RCU_LOCKDEP_WARN(!(cond) && !rcu_read_lock_any_held(), \ "RCU-list traversed in non-reader section!"); \ }) + +#define __list_check_srcu(cond) \ + ({ \ + RCU_LOCKDEP_WARN(!(cond), \ + "RCU-list traversed without holding the required lock!");\ + }) #else #define __list_check_rcu(dummy, cond, extra...) \ ({ check_arg_count_one(extra); }) + +#define __list_check_srcu(cond) ({ }) #endif /* @@ -386,6 +394,25 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** + * list_for_each_entry_srcu - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * @cond: lockdep expression for the lock required to traverse the list. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by srcu_read_lock(). + * The lockdep expression srcu_read_lock_held() can be passed as the + * cond argument from read side. + */ +#define list_for_each_entry_srcu(pos, head, member, cond) \ + for (__list_check_srcu(cond), \ + pos = list_entry_rcu((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) + +/** * list_entry_lockless - get the struct for this entry * @ptr: the &struct list_head pointer. * @type: the type of the struct this is embedded in. @@ -684,6 +711,27 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n, &(pos)->member)), typeof(*(pos)), member)) /** + * hlist_for_each_entry_srcu - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * @cond: lockdep expression for the lock required to traverse the list. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as hlist_add_head_rcu() + * as long as the traversal is guarded by srcu_read_lock(). + * The lockdep expression srcu_read_lock_held() can be passed as the + * cond argument from read side. + */ +#define hlist_for_each_entry_srcu(pos, head, member, cond) \ + for (__list_check_srcu(cond), \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) + +/** * hlist_for_each_entry_rcu_notrace - iterate over rcu list of given type (for tracing) * @pos: the type * to use as a loop cursor. * @head: the head for your list. diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d15d46db61f7..b47d6b66665e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -709,8 +709,8 @@ static inline void rcu_read_lock_bh(void) "rcu_read_lock_bh() used illegally while idle"); } -/* - * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section +/** + * rcu_read_unlock_bh() - marks the end of a softirq-only RCU critical section * * See rcu_read_lock_bh() for more information. */ @@ -751,10 +751,10 @@ static inline notrace void rcu_read_lock_sched_notrace(void) __acquire(RCU_SCHED); } -/* - * rcu_read_unlock_sched - marks the end of a RCU-classic critical section +/** + * rcu_read_unlock_sched() - marks the end of a RCU-classic critical section * - * See rcu_read_lock_sched for more information. + * See rcu_read_lock_sched() for more information. */ static inline void rcu_read_unlock_sched(void) { @@ -945,7 +945,7 @@ static inline void rcu_head_init(struct rcu_head *rhp) } /** - * rcu_head_after_call_rcu - Has this rcu_head been passed to call_rcu()? + * rcu_head_after_call_rcu() - Has this rcu_head been passed to call_rcu()? * @rhp: The rcu_head structure to test. * @f: The function passed to call_rcu() along with @rhp. * diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 5cc9637cac16..7c1ecdb356d8 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -103,7 +103,6 @@ static inline void rcu_scheduler_starting(void) { } static inline void rcu_end_inkernel_boot(void) { } static inline bool rcu_inkernel_boot_has_ended(void) { return true; } static inline bool rcu_is_watching(void) { return true; } -static inline bool __rcu_is_watching(void) { return true; } static inline void rcu_momentary_dyntick_idle(void) { } static inline void kfree_rcu_scheduler_running(void) { } static inline bool rcu_gp_might_be_stalled(void) { return false; } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d2f4064ebd1d..59eb5cd567d7 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -64,7 +64,6 @@ extern int rcu_scheduler_active __read_mostly; void rcu_end_inkernel_boot(void); bool rcu_inkernel_boot_has_ended(void); bool rcu_is_watching(void); -bool __rcu_is_watching(void); #ifndef CONFIG_PREEMPTION void rcu_all_qs(void); #endif diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index ced71237b7e4..155b5cb43cfd 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -74,17 +74,17 @@ TRACE_EVENT_RCU(rcu_grace_period, TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gp_seq) + __field(long, gp_seq) __field(const char *, gpevent) ), TP_fast_assign( __entry->rcuname = rcuname; - __entry->gp_seq = gp_seq; + __entry->gp_seq = (long)gp_seq; __entry->gpevent = gpevent; ), - TP_printk("%s %lu %s", + TP_printk("%s %ld %s", __entry->rcuname, __entry->gp_seq, __entry->gpevent) ); @@ -114,8 +114,8 @@ TRACE_EVENT_RCU(rcu_future_grace_period, TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gp_seq) - __field(unsigned long, gp_seq_req) + __field(long, gp_seq) + __field(long, gp_seq_req) __field(u8, level) __field(int, grplo) __field(int, grphi) @@ -124,16 +124,16 @@ TRACE_EVENT_RCU(rcu_future_grace_period, TP_fast_assign( __entry->rcuname = rcuname; - __entry->gp_seq = gp_seq; - __entry->gp_seq_req = gp_seq_req; + __entry->gp_seq = (long)gp_seq; + __entry->gp_seq_req = (long)gp_seq_req; __entry->level = level; __entry->grplo = grplo; __entry->grphi = grphi; __entry->gpevent = gpevent; ), - TP_printk("%s %lu %lu %u %d %d %s", - __entry->rcuname, __entry->gp_seq, __entry->gp_seq_req, __entry->level, + TP_printk("%s %ld %ld %u %d %d %s", + __entry->rcuname, (long)__entry->gp_seq, (long)__entry->gp_seq_req, __entry->level, __entry->grplo, __entry->grphi, __entry->gpevent) ); @@ -153,7 +153,7 @@ TRACE_EVENT_RCU(rcu_grace_period_init, TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gp_seq) + __field(long, gp_seq) __field(u8, level) __field(int, grplo) __field(int, grphi) @@ -162,14 +162,14 @@ TRACE_EVENT_RCU(rcu_grace_period_init, TP_fast_assign( __entry->rcuname = rcuname; - __entry->gp_seq = gp_seq; + __entry->gp_seq = (long)gp_seq; __entry->level = level; __entry->grplo = grplo; __entry->grphi = grphi; __entry->qsmask = qsmask; ), - TP_printk("%s %lu %u %d %d %lx", + TP_printk("%s %ld %u %d %d %lx", __entry->rcuname, __entry->gp_seq, __entry->level, __entry->grplo, __entry->grphi, __entry->qsmask) ); @@ -197,17 +197,17 @@ TRACE_EVENT_RCU(rcu_exp_grace_period, TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gpseq) + __field(long, gpseq) __field(const char *, gpevent) ), TP_fast_assign( __entry->rcuname = rcuname; - __entry->gpseq = gpseq; + __entry->gpseq = (long)gpseq; __entry->gpevent = gpevent; ), - TP_printk("%s %lu %s", + TP_printk("%s %ld %s", __entry->rcuname, __entry->gpseq, __entry->gpevent) ); @@ -316,17 +316,17 @@ TRACE_EVENT_RCU(rcu_preempt_task, TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gp_seq) + __field(long, gp_seq) __field(int, pid) ), TP_fast_assign( __entry->rcuname = rcuname; - __entry->gp_seq = gp_seq; + __entry->gp_seq = (long)gp_seq; __entry->pid = pid; ), - TP_printk("%s %lu %d", + TP_printk("%s %ld %d", __entry->rcuname, __entry->gp_seq, __entry->pid) ); @@ -343,17 +343,17 @@ TRACE_EVENT_RCU(rcu_unlock_preempted_task, TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gp_seq) + __field(long, gp_seq) __field(int, pid) ), TP_fast_assign( __entry->rcuname = rcuname; - __entry->gp_seq = gp_seq; + __entry->gp_seq = (long)gp_seq; __entry->pid = pid; ), - TP_printk("%s %lu %d", __entry->rcuname, __entry->gp_seq, __entry->pid) + TP_printk("%s %ld %d", __entry->rcuname, __entry->gp_seq, __entry->pid) ); /* @@ -374,7 +374,7 @@ TRACE_EVENT_RCU(rcu_quiescent_state_report, TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gp_seq) + __field(long, gp_seq) __field(unsigned long, mask) __field(unsigned long, qsmask) __field(u8, level) @@ -385,7 +385,7 @@ TRACE_EVENT_RCU(rcu_quiescent_state_report, TP_fast_assign( __entry->rcuname = rcuname; - __entry->gp_seq = gp_seq; + __entry->gp_seq = (long)gp_seq; __entry->mask = mask; __entry->qsmask = qsmask; __entry->level = level; @@ -394,7 +394,7 @@ TRACE_EVENT_RCU(rcu_quiescent_state_report, __entry->gp_tasks = gp_tasks; ), - TP_printk("%s %lu %lx>%lx %u %d %d %u", + TP_printk("%s %ld %lx>%lx %u %d %d %u", __entry->rcuname, __entry->gp_seq, __entry->mask, __entry->qsmask, __entry->level, __entry->grplo, __entry->grphi, __entry->gp_tasks) @@ -415,19 +415,19 @@ TRACE_EVENT_RCU(rcu_fqs, TP_STRUCT__entry( __field(const char *, rcuname) - __field(unsigned long, gp_seq) + __field(long, gp_seq) __field(int, cpu) __field(const char *, qsevent) ), TP_fast_assign( __entry->rcuname = rcuname; - __entry->gp_seq = gp_seq; + __entry->gp_seq = (long)gp_seq; __entry->cpu = cpu; __entry->qsevent = qsevent; ), - TP_printk("%s %lu %d %s", + TP_printk("%s %ld %d %s", __entry->rcuname, __entry->gp_seq, __entry->cpu, __entry->qsevent) ); diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 9852e0d62d95..ad794a10fa80 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -278,7 +278,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs) * terminate a grace period, if and only if the timer interrupt is * not nested into another interrupt. * - * Checking for __rcu_is_watching() here would prevent the nesting + * Checking for rcu_is_watching() here would prevent the nesting * interrupt to invoke rcu_irq_enter(). If that nested interrupt is * the tick then rcu_flavor_sched_clock_irq() would wrongfully * assume that it is the first interupt and eventually claim diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c index 9a0f66133b4b..2d2a6b6b9dfb 100644 --- a/kernel/rcu/rcu_segcblist.c +++ b/kernel/rcu/rcu_segcblist.c @@ -475,8 +475,16 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq) * Also advance to the oldest segment of callbacks whose * ->gp_seq[] completion is at or after that passed in via "seq", * skipping any empty segments. + * + * Note that segment "i" (and any lower-numbered segments + * containing older callbacks) will be unaffected, and their + * grace-period numbers remain unchanged. For example, if i == + * WAIT_TAIL, then neither WAIT_TAIL nor DONE_TAIL will be touched. + * Instead, the CBs in NEXT_TAIL will be merged with those in + * NEXT_READY_TAIL and the grace-period number of NEXT_READY_TAIL + * would be updated. NEXT_TAIL would then be empty. */ - if (++i >= RCU_NEXT_TAIL) + if (rcu_segcblist_restempty(rsclp, i) || ++i >= RCU_NEXT_TAIL) return false; /* diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index c100acf332ed..c13348ee80a5 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -29,19 +29,6 @@ #include "rcu.h" #include "rcu_segcblist.h" -#ifndef data_race -#define data_race(expr) \ - ({ \ - expr; \ - }) -#endif -#ifndef ASSERT_EXCLUSIVE_WRITER -#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0) -#endif -#ifndef ASSERT_EXCLUSIVE_ACCESS -#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0) -#endif - /* Holdoff in nanoseconds for auto-expediting. */ #define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000) static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF; diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8ce77d9ac716..92450642c4d8 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -70,19 +70,6 @@ #endif #define MODULE_PARAM_PREFIX "rcutree." -#ifndef data_race -#define data_race(expr) \ - ({ \ - expr; \ - }) -#endif -#ifndef ASSERT_EXCLUSIVE_WRITER -#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0) -#endif -#ifndef ASSERT_EXCLUSIVE_ACCESS -#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0) -#endif - /* Data structures. */ /* @@ -1090,11 +1077,6 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp) } } -noinstr bool __rcu_is_watching(void) -{ - return !rcu_dynticks_curr_cpu_in_eqs(); -} - /** * rcu_is_watching - see if RCU thinks that the current CPU is not idle * @@ -1227,13 +1209,28 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) return 1; } - /* If waiting too long on an offline CPU, complain. */ - if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp)) && - time_after(jiffies, rcu_state.gp_start + HZ)) { + /* + * Complain if a CPU that is considered to be offline from RCU's + * perspective has not yet reported a quiescent state. After all, + * the offline CPU should have reported a quiescent state during + * the CPU-offline process, or, failing that, by rcu_gp_init() + * if it ran concurrently with either the CPU going offline or the + * last task on a leaf rcu_node structure exiting its RCU read-side + * critical section while all CPUs corresponding to that structure + * are offline. This added warning detects bugs in any of these + * code paths. + * + * The rcu_node structure's ->lock is held here, which excludes + * the relevant portions the CPU-hotplug code, the grace-period + * initialization code, and the rcu_read_unlock() code paths. + * + * For more detail, please refer to the "Hotplug CPU" section + * of RCU's Requirements documentation. + */ + if (WARN_ON_ONCE(!(rdp->grpmask & rcu_rnp_online_cpus(rnp)))) { bool onl; struct rcu_node *rnp1; - WARN_ON(1); /* Offline CPUs are supposed to report QS! */ pr_info("%s: grp: %d-%d level: %d ->gp_seq %ld ->completedqs %ld\n", __func__, rnp->grplo, rnp->grphi, rnp->level, (long)rnp->gp_seq, (long)rnp->completedqs); @@ -1496,9 +1493,10 @@ static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp) /* Trace depending on how much we were able to accelerate. */ if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL)) - trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("AccWaitCB")); + trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccWaitCB")); else - trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("AccReadyCB")); + trace_rcu_grace_period(rcu_state.name, gp_seq_req, TPS("AccReadyCB")); + return ret; } @@ -1718,10 +1716,13 @@ static bool rcu_gp_init(void) raw_spin_unlock_irq_rcu_node(rnp); /* - * Apply per-leaf buffered online and offline operations to the - * rcu_node tree. Note that this new grace period need not wait - * for subsequent online CPUs, and that quiescent-state forcing - * will handle subsequent offline CPUs. + * Apply per-leaf buffered online and offline operations to + * the rcu_node tree. Note that this new grace period need not + * wait for subsequent online CPUs, and that RCU hooks in the CPU + * offlining path, when combined with checks in this function, + * will handle CPUs that are currently going offline or that will + * go offline later. Please also refer to "Hotplug CPU" section + * of RCU's Requirements documentation. */ rcu_state.gp_state = RCU_GP_ONOFF; rcu_for_each_leaf_node(rnp) { @@ -1896,7 +1897,7 @@ static void rcu_gp_fqs_loop(void) break; /* If time for quiescent-state forcing, do it. */ if (!time_after(rcu_state.jiffies_force_qs, jiffies) || - (gf & RCU_GP_FLAG_FQS)) { + (gf & (RCU_GP_FLAG_FQS | RCU_GP_FLAG_OVLD))) { trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("fqsstart")); rcu_gp_fqs(first_gp_fqs); @@ -2374,6 +2375,7 @@ int rcutree_dead_cpu(unsigned int cpu) */ static void rcu_do_batch(struct rcu_data *rdp) { + int div; unsigned long flags; const bool offloaded = IS_ENABLED(CONFIG_RCU_NOCB_CPU) && rcu_segcblist_is_offloaded(&rdp->cblist); @@ -2402,9 +2404,15 @@ static void rcu_do_batch(struct rcu_data *rdp) rcu_nocb_lock(rdp); WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); pending = rcu_segcblist_n_cbs(&rdp->cblist); - bl = max(rdp->blimit, pending >> rcu_divisor); - if (unlikely(bl > 100)) - tlimit = local_clock() + rcu_resched_ns; + div = READ_ONCE(rcu_divisor); + div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div; + bl = max(rdp->blimit, pending >> div); + if (unlikely(bl > 100)) { + long rrn = READ_ONCE(rcu_resched_ns); + + rrn = rrn < NSEC_PER_MSEC ? NSEC_PER_MSEC : rrn > NSEC_PER_SEC ? NSEC_PER_SEC : rrn; + tlimit = local_clock() + rrn; + } trace_rcu_batch_start(rcu_state.name, rcu_segcblist_n_cbs(&rdp->cblist), bl); rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl); @@ -2545,8 +2553,7 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp)) raw_spin_lock_irqsave_rcu_node(rnp, flags); rcu_state.cbovldnext |= !!rnp->cbovldmask; if (rnp->qsmask == 0) { - if (!IS_ENABLED(CONFIG_PREEMPT_RCU) || - rcu_preempt_blocked_readers_cgp(rnp)) { + if (rcu_preempt_blocked_readers_cgp(rnp)) { /* * No point in scanning bits because they * are all zero. But we might need to @@ -3443,7 +3450,7 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) unsigned long count = 0; /* Snapshot count of all CPUs */ - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); count += READ_ONCE(krcp->count); @@ -3458,7 +3465,7 @@ kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) int cpu, freed = 0; unsigned long flags; - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { int count; struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); @@ -3491,7 +3498,7 @@ void __init kfree_rcu_scheduler_running(void) int cpu; unsigned long flags; - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu); raw_spin_lock_irqsave(&krcp->lock, flags); @@ -3973,8 +3980,6 @@ int rcutree_offline_cpu(unsigned int cpu) return 0; } -static DEFINE_PER_CPU(int, rcu_cpu_started); - /* * Mark the specified CPU as being online so that subsequent grace periods * (both expedited and normal) will wait on it. Note that this means that @@ -3994,12 +3999,11 @@ void rcu_cpu_starting(unsigned int cpu) struct rcu_node *rnp; bool newcpu; - if (per_cpu(rcu_cpu_started, cpu)) + rdp = per_cpu_ptr(&rcu_data, cpu); + if (rdp->cpu_started) return; + rdp->cpu_started = true; - per_cpu(rcu_cpu_started, cpu) = 1; - - rdp = per_cpu_ptr(&rcu_data, cpu); rnp = rdp->mynode; mask = rdp->grpmask; raw_spin_lock_irqsave_rcu_node(rnp, flags); @@ -4059,7 +4063,7 @@ void rcu_report_dead(unsigned int cpu) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); raw_spin_unlock(&rcu_state.ofl_lock); - per_cpu(rcu_cpu_started, cpu) = 0; + rdp->cpu_started = false; } /* diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index c96ae351688b..309bc7f41d35 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -156,6 +156,7 @@ struct rcu_data { bool beenonline; /* CPU online at least once. */ bool gpwrap; /* Possible ->gp_seq wrap. */ bool exp_deferred_qs; /* This CPU awaiting a deferred QS? */ + bool cpu_started; /* RCU watching this onlining CPU. */ struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ unsigned long grpmask; /* Mask to apply to leaf qsmask. */ unsigned long ticks_this_gp; /* The number of scheduling-clock */ diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 1888c0eb1216..8760b6ead770 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -732,11 +732,9 @@ static void rcu_exp_need_qs(void) /* Invoked on each online non-idle CPU for expedited quiescent state. */ static void rcu_exp_handler(void *unused) { - struct rcu_data *rdp; - struct rcu_node *rnp; + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + struct rcu_node *rnp = rdp->mynode; - rdp = this_cpu_ptr(&rcu_data); - rnp = rdp->mynode; if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) return; diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 982fc5be5269..cb1e8c8befb9 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1926,6 +1926,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) * nearest grace period (if any) to wait for next. The CB kthreads * and the global grace-period kthread are awakened if needed. */ + WARN_ON_ONCE(my_rdp->nocb_gp_rdp != my_rdp); for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_cb_rdp) { trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("Check")); rcu_nocb_lock_irqsave(rdp, flags); @@ -2411,13 +2412,12 @@ static void show_rcu_nocb_state(struct rcu_data *rdp) return; waslocked = raw_spin_is_locked(&rdp->nocb_gp_lock); - wastimer = timer_pending(&rdp->nocb_timer); + wastimer = timer_pending(&rdp->nocb_bypass_timer); wassleep = swait_active(&rdp->nocb_gp_wq); - if (!rdp->nocb_defer_wakeup && !rdp->nocb_gp_sleep && - !waslocked && !wastimer && !wassleep) + if (!rdp->nocb_gp_sleep && !waslocked && !wastimer && !wassleep) return; /* Nothing untowards. */ - pr_info(" !!! %c%c%c%c %c\n", + pr_info(" nocb GP activity on CB-only CPU!!! %c%c%c%c %c\n", "lL"[waslocked], "dD"[!!rdp->nocb_defer_wakeup], "tT"[wastimer], diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index b5d3b4794db4..0fde39b8daab 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -158,7 +158,7 @@ static void rcu_stall_kick_kthreads(void) { unsigned long j; - if (!rcu_kick_kthreads) + if (!READ_ONCE(rcu_kick_kthreads)) return; j = READ_ONCE(rcu_state.jiffies_kick_kthreads); if (time_after(jiffies, j) && rcu_state.gp_kthread && @@ -580,7 +580,7 @@ static void check_cpu_stall(struct rcu_data *rdp) unsigned long js; struct rcu_node *rnp; - if ((rcu_stall_is_suppressed() && !rcu_kick_kthreads) || + if ((rcu_stall_is_suppressed() && !READ_ONCE(rcu_kick_kthreads)) || !rcu_gp_in_progress()) return; rcu_stall_kick_kthreads(); @@ -623,7 +623,7 @@ static void check_cpu_stall(struct rcu_data *rdp) /* We haven't checked in, so go dump stack. */ print_cpu_stall(gps); - if (rcu_cpu_stall_ftrace_dump) + if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) rcu_ftrace_dump(DUMP_ALL); } else if (rcu_gp_in_progress() && @@ -632,7 +632,7 @@ static void check_cpu_stall(struct rcu_data *rdp) /* They had a few time units to dump stack, so complain. */ print_other_cpu_stall(gs2, gps); - if (rcu_cpu_stall_ftrace_dump) + if (READ_ONCE(rcu_cpu_stall_ftrace_dump)) rcu_ftrace_dump(DUMP_ALL); } } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 2de49b5d8dd2..5f7713a27dbb 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -53,19 +53,6 @@ #endif #define MODULE_PARAM_PREFIX "rcupdate." -#ifndef data_race -#define data_race(expr) \ - ({ \ - expr; \ - }) -#endif -#ifndef ASSERT_EXCLUSIVE_WRITER -#define ASSERT_EXCLUSIVE_WRITER(var) do { } while (0) -#endif -#ifndef ASSERT_EXCLUSIVE_ACCESS -#define ASSERT_EXCLUSIVE_ACCESS(var) do { } while (0) -#endif - #ifndef CONFIG_TINY_RCU module_param(rcu_expedited, int, 0); module_param(rcu_normal, int, 0); |