diff options
Diffstat (limited to 'kernel/rcu/tree.c')
| -rw-r--r-- | kernel/rcu/tree.c | 152 | 
1 files changed, 96 insertions, 56 deletions
| diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 93416afebd59..d04f2192f02c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -301,12 +301,6 @@ static bool rcu_dynticks_in_eqs(int snap)  	return !(snap & RCU_DYNTICKS_IDX);  } -/* Return true if the specified CPU is currently idle from an RCU viewpoint.  */ -bool rcu_is_idle_cpu(int cpu) -{ -	return rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu)); -} -  /*   * Return true if the CPU corresponding to the specified rcu_data   * structure has spent some time in an extended quiescent state since @@ -2108,7 +2102,7 @@ int rcutree_dying_cpu(unsigned int cpu)  	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))  		return 0; -	blkd = !!(rnp->qsmask & rdp->grpmask); +	blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);  	trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),  			       blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));  	return 0; @@ -2418,7 +2412,7 @@ void rcu_force_quiescent_state(void)  	struct rcu_node *rnp_old = NULL;  	/* Funnel through hierarchy to reduce memory contention. */ -	rnp = __this_cpu_read(rcu_data.mynode); +	rnp = raw_cpu_read(rcu_data.mynode);  	for (; rnp != NULL; rnp = rnp->parent) {  		ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||  		       !raw_spin_trylock(&rnp->fqslock); @@ -2730,47 +2724,8 @@ static void check_cb_ovld(struct rcu_data *rdp)  	raw_spin_unlock_rcu_node(rnp);  } -/** - * call_rcu() - Queue an RCU callback for invocation after a grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all pre-existing RCU read-side - * critical sections have completed.  However, the callback function - * might well execute concurrently with RCU read-side critical sections - * that started after call_rcu() was invoked. - * - * RCU read-side critical sections are delimited by rcu_read_lock() - * and rcu_read_unlock(), and may be nested.  In addition, but only in - * v5.0 and later, regions of code across which interrupts, preemption, - * or softirqs have been disabled also serve as RCU read-side critical - * sections.  This includes hardware interrupt handlers, softirq handlers, - * and NMI handlers. - * - * Note that all CPUs must agree that the grace period extended beyond - * all pre-existing RCU read-side critical section.  On systems with more - * than one CPU, this means that when "func()" is invoked, each CPU is - * guaranteed to have executed a full memory barrier since the end of its - * last RCU read-side critical section whose beginning preceded the call - * to call_rcu().  It also means that each CPU executing an RCU read-side - * critical section that continues beyond the start of "func()" must have - * executed a memory barrier after the call_rcu() but before the beginning - * of that RCU read-side critical section.  Note that these guarantees - * include CPUs that are offline, idle, or executing in user mode, as - * well as CPUs that are executing in the kernel. - * - * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the - * resulting RCU callback function "func()", then both CPU A and CPU B are - * guaranteed to execute a full memory barrier during the time interval - * between the call to call_rcu() and the invocation of "func()" -- even - * if CPU A and CPU B are the same CPU (but again only if the system has - * more than one CPU). - * - * Implementation of these memory-ordering guarantees is described here: - * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst. - */ -void call_rcu(struct rcu_head *head, rcu_callback_t func) +static void +__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)  {  	static atomic_t doublefrees;  	unsigned long flags; @@ -2811,7 +2766,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)  	}  	check_cb_ovld(rdp); -	if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags)) +	if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy))  		return; // Enqueued onto ->nocb_bypass, so just leave.  	// If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.  	rcu_segcblist_enqueue(&rdp->cblist, head); @@ -2833,8 +2788,84 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)  		local_irq_restore(flags);  	}  } -EXPORT_SYMBOL_GPL(call_rcu); +#ifdef CONFIG_RCU_LAZY +/** + * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and + * flush all lazy callbacks (including the new one) to the main ->cblist while + * doing so. + * + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. + * + * Use this API instead of call_rcu() if you don't want the callback to be + * invoked after very long periods of time, which can happen on systems without + * memory pressure and on systems which are lightly loaded or mostly idle. + * This function will cause callbacks to be invoked sooner than later at the + * expense of extra power. Other than that, this function is identical to, and + * reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory + * ordering and other functionality. + */ +void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) +{ +	return __call_rcu_common(head, func, false); +} +EXPORT_SYMBOL_GPL(call_rcu_hurry); +#endif + +/** + * call_rcu() - Queue an RCU callback for invocation after a grace period. + * By default the callbacks are 'lazy' and are kept hidden from the main + * ->cblist to prevent starting of grace periods too soon. + * If you desire grace periods to start very soon, use call_rcu_hurry(). + * + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed.  However, the callback function + * might well execute concurrently with RCU read-side critical sections + * that started after call_rcu() was invoked. + * + * RCU read-side critical sections are delimited by rcu_read_lock() + * and rcu_read_unlock(), and may be nested.  In addition, but only in + * v5.0 and later, regions of code across which interrupts, preemption, + * or softirqs have been disabled also serve as RCU read-side critical + * sections.  This includes hardware interrupt handlers, softirq handlers, + * and NMI handlers. + * + * Note that all CPUs must agree that the grace period extended beyond + * all pre-existing RCU read-side critical section.  On systems with more + * than one CPU, this means that when "func()" is invoked, each CPU is + * guaranteed to have executed a full memory barrier since the end of its + * last RCU read-side critical section whose beginning preceded the call + * to call_rcu().  It also means that each CPU executing an RCU read-side + * critical section that continues beyond the start of "func()" must have + * executed a memory barrier after the call_rcu() but before the beginning + * of that RCU read-side critical section.  Note that these guarantees + * include CPUs that are offline, idle, or executing in user mode, as + * well as CPUs that are executing in the kernel. + * + * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the + * resulting RCU callback function "func()", then both CPU A and CPU B are + * guaranteed to execute a full memory barrier during the time interval + * between the call to call_rcu() and the invocation of "func()" -- even + * if CPU A and CPU B are the same CPU (but again only if the system has + * more than one CPU). + * + * Implementation of these memory-ordering guarantees is described here: + * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst. + */ +void call_rcu(struct rcu_head *head, rcu_callback_t func) +{ +	return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY)); +} +EXPORT_SYMBOL_GPL(call_rcu);  /* Maximum number of jiffies to wait before draining a batch. */  #define KFREE_DRAIN_JIFFIES (5 * HZ) @@ -3509,7 +3540,7 @@ void synchronize_rcu(void)  		if (rcu_gp_is_expedited())  			synchronize_rcu_expedited();  		else -			wait_rcu_gp(call_rcu); +			wait_rcu_gp(call_rcu_hurry);  		return;  	} @@ -3896,6 +3927,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)  {  	unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);  	unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap); +	bool wake_nocb = false; +	bool was_alldone = false;  	lockdep_assert_held(&rcu_state.barrier_lock);  	if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq)) @@ -3904,7 +3937,14 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)  	rdp->barrier_head.func = rcu_barrier_callback;  	debug_rcu_head_queue(&rdp->barrier_head);  	rcu_nocb_lock(rdp); -	WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies)); +	/* +	 * Flush bypass and wakeup rcuog if we add callbacks to an empty regular +	 * queue. This way we don't wait for bypass timer that can reach seconds +	 * if it's fully lazy. +	 */ +	was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist); +	WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false)); +	wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);  	if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {  		atomic_inc(&rcu_state.barrier_cpu_count);  	} else { @@ -3912,6 +3952,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)  		rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);  	}  	rcu_nocb_unlock(rdp); +	if (wake_nocb) +		wake_nocb_gp(rdp, false);  	smp_store_release(&rdp->barrier_seq_snap, gseq);  } @@ -4278,8 +4320,6 @@ void rcu_report_dead(unsigned int cpu)  	// Do any dangling deferred wakeups.  	do_nocb_deferred_wakeup(rdp); -	/* QS for any half-done expedited grace period. */ -	rcu_report_exp_rdp(rdp);  	rcu_preempt_deferred_qs(current);  	/* Remove outgoing CPU from mask in the leaf rcu_node structure. */ @@ -4327,7 +4367,7 @@ void rcutree_migrate_callbacks(int cpu)  	my_rdp = this_cpu_ptr(&rcu_data);  	my_rnp = my_rdp->mynode;  	rcu_nocb_lock(my_rdp); /* irqs already disabled. */ -	WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies)); +	WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false));  	raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */  	/* Leverage recent GPs and set GP for new callbacks. */  	needwake = rcu_advance_cbs(my_rnp, rdp) || |