diff options
Diffstat (limited to 'kernel/rcu/tree.c')
| -rw-r--r-- | kernel/rcu/tree.c | 215 | 
1 files changed, 86 insertions, 129 deletions
| diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 51d4c3acf32d..1250e4bd4b85 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -97,9 +97,6 @@ struct rcu_state sname##_state = { \  	.gp_state = RCU_GP_IDLE, \  	.gpnum = 0UL - 300UL, \  	.completed = 0UL - 300UL, \ -	.orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \ -	.orphan_pend = RCU_CBLIST_INITIALIZER(sname##_state.orphan_pend), \ -	.orphan_done = RCU_CBLIST_INITIALIZER(sname##_state.orphan_done), \  	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \  	.name = RCU_STATE_NAME(sname), \  	.abbr = sabbr, \ @@ -843,13 +840,9 @@ static void rcu_eqs_enter(bool user)   */  void rcu_idle_enter(void)  { -	unsigned long flags; - -	local_irq_save(flags); +	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_idle_enter() invoked with irqs enabled!!!");  	rcu_eqs_enter(false); -	local_irq_restore(flags);  } -EXPORT_SYMBOL_GPL(rcu_idle_enter);  #ifdef CONFIG_NO_HZ_FULL  /** @@ -862,7 +855,8 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);   */  void rcu_user_enter(void)  { -	rcu_eqs_enter(1); +	RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_user_enter() invoked with irqs enabled!!!"); +	rcu_eqs_enter(true);  }  #endif /* CONFIG_NO_HZ_FULL */ @@ -955,8 +949,10 @@ static void rcu_eqs_exit(bool user)  	if (oldval & DYNTICK_TASK_NEST_MASK) {  		rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;  	} else { +		__this_cpu_inc(disable_rcu_irq_enter);  		rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;  		rcu_eqs_exit_common(oldval, user); +		__this_cpu_dec(disable_rcu_irq_enter);  	}  } @@ -979,7 +975,6 @@ void rcu_idle_exit(void)  	rcu_eqs_exit(false);  	local_irq_restore(flags);  } -EXPORT_SYMBOL_GPL(rcu_idle_exit);  #ifdef CONFIG_NO_HZ_FULL  /** @@ -1358,12 +1353,13 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)  	j = jiffies;  	gpa = READ_ONCE(rsp->gp_activity);  	if (j - gpa > 2 * HZ) { -		pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx\n", +		pr_err("%s kthread starved for %ld jiffies! g%lu c%lu f%#x %s(%d) ->state=%#lx ->cpu=%d\n",  		       rsp->name, j - gpa,  		       rsp->gpnum, rsp->completed,  		       rsp->gp_flags,  		       gp_state_getname(rsp->gp_state), rsp->gp_state, -		       rsp->gp_kthread ? rsp->gp_kthread->state : ~0); +		       rsp->gp_kthread ? rsp->gp_kthread->state : ~0, +		       rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1);  		if (rsp->gp_kthread) {  			sched_show_task(rsp->gp_kthread);  			wake_up_process(rsp->gp_kthread); @@ -2067,8 +2063,8 @@ static bool rcu_gp_init(struct rcu_state *rsp)  }  /* - * Helper function for wait_event_interruptible_timeout() wakeup - * at force-quiescent-state time. + * Helper function for swait_event_idle() wakeup at force-quiescent-state + * time.   */  static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)  { @@ -2206,9 +2202,8 @@ static int __noreturn rcu_gp_kthread(void *arg)  					       READ_ONCE(rsp->gpnum),  					       TPS("reqwait"));  			rsp->gp_state = RCU_GP_WAIT_GPS; -			swait_event_interruptible(rsp->gp_wq, -						 READ_ONCE(rsp->gp_flags) & -						 RCU_GP_FLAG_INIT); +			swait_event_idle(rsp->gp_wq, READ_ONCE(rsp->gp_flags) & +						     RCU_GP_FLAG_INIT);  			rsp->gp_state = RCU_GP_DONE_GPS;  			/* Locking provides needed memory barrier. */  			if (rcu_gp_init(rsp)) @@ -2239,7 +2234,7 @@ static int __noreturn rcu_gp_kthread(void *arg)  					       READ_ONCE(rsp->gpnum),  					       TPS("fqswait"));  			rsp->gp_state = RCU_GP_WAIT_FQS; -			ret = swait_event_interruptible_timeout(rsp->gp_wq, +			ret = swait_event_idle_timeout(rsp->gp_wq,  					rcu_gp_fqs_check_wake(rsp, &gf), j);  			rsp->gp_state = RCU_GP_DOING_FQS;  			/* Locking provides needed memory barriers. */ @@ -2409,6 +2404,8 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,  			return;  		}  		WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ +		WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 && +			     rcu_preempt_blocked_readers_cgp(rnp));  		rnp->qsmask &= ~mask;  		trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,  						 mask, rnp->qsmask, rnp->level, @@ -2563,85 +2560,6 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)  }  /* - * Send the specified CPU's RCU callbacks to the orphanage.  The - * specified CPU must be offline, and the caller must hold the - * ->orphan_lock. - */ -static void -rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, -			  struct rcu_node *rnp, struct rcu_data *rdp) -{ -	lockdep_assert_held(&rsp->orphan_lock); - -	/* No-CBs CPUs do not have orphanable callbacks. */ -	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu)) -		return; - -	/* -	 * Orphan the callbacks.  First adjust the counts.  This is safe -	 * because _rcu_barrier() excludes CPU-hotplug operations, so it -	 * cannot be running now.  Thus no memory barrier is required. -	 */ -	rdp->n_cbs_orphaned += rcu_segcblist_n_cbs(&rdp->cblist); -	rcu_segcblist_extract_count(&rdp->cblist, &rsp->orphan_done); - -	/* -	 * Next, move those callbacks still needing a grace period to -	 * the orphanage, where some other CPU will pick them up. -	 * Some of the callbacks might have gone partway through a grace -	 * period, but that is too bad.  They get to start over because we -	 * cannot assume that grace periods are synchronized across CPUs. -	 */ -	rcu_segcblist_extract_pend_cbs(&rdp->cblist, &rsp->orphan_pend); - -	/* -	 * Then move the ready-to-invoke callbacks to the orphanage, -	 * where some other CPU will pick them up.  These will not be -	 * required to pass though another grace period: They are done. -	 */ -	rcu_segcblist_extract_done_cbs(&rdp->cblist, &rsp->orphan_done); - -	/* Finally, disallow further callbacks on this CPU.  */ -	rcu_segcblist_disable(&rdp->cblist); -} - -/* - * Adopt the RCU callbacks from the specified rcu_state structure's - * orphanage.  The caller must hold the ->orphan_lock. - */ -static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) -{ -	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); - -	lockdep_assert_held(&rsp->orphan_lock); - -	/* No-CBs CPUs are handled specially. */ -	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || -	    rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) -		return; - -	/* Do the accounting first. */ -	rdp->n_cbs_adopted += rsp->orphan_done.len; -	if (rsp->orphan_done.len_lazy != rsp->orphan_done.len) -		rcu_idle_count_callbacks_posted(); -	rcu_segcblist_insert_count(&rdp->cblist, &rsp->orphan_done); - -	/* -	 * We do not need a memory barrier here because the only way we -	 * can get here if there is an rcu_barrier() in flight is if -	 * we are the task doing the rcu_barrier(). -	 */ - -	/* First adopt the ready-to-invoke callbacks, then the done ones. */ -	rcu_segcblist_insert_done_cbs(&rdp->cblist, &rsp->orphan_done); -	WARN_ON_ONCE(rsp->orphan_done.head); -	rcu_segcblist_insert_pend_cbs(&rdp->cblist, &rsp->orphan_pend); -	WARN_ON_ONCE(rsp->orphan_pend.head); -	WARN_ON_ONCE(rcu_segcblist_empty(&rdp->cblist) != -		     !rcu_segcblist_n_cbs(&rdp->cblist)); -} - -/*   * Trace the fact that this CPU is going offline.   */  static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) @@ -2704,14 +2622,12 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)  /*   * The CPU has been completely removed, and some other CPU is reporting - * this fact from process context.  Do the remainder of the cleanup, - * including orphaning the outgoing CPU's RCU callbacks, and also - * adopting them.  There can only be one CPU hotplug operation at a time, - * so no other CPU can be attempting to update rcu_cpu_kthread_task. + * this fact from process context.  Do the remainder of the cleanup. + * There can only be one CPU hotplug operation at a time, so no need for + * explicit locking.   */  static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)  { -	unsigned long flags;  	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);  	struct rcu_node *rnp = rdp->mynode;  /* Outgoing CPU's rdp & rnp. */ @@ -2720,18 +2636,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)  	/* Adjust any no-longer-needed kthreads. */  	rcu_boost_kthread_setaffinity(rnp, -1); - -	/* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ -	raw_spin_lock_irqsave(&rsp->orphan_lock, flags); -	rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); -	rcu_adopt_orphan_cbs(rsp, flags); -	raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); - -	WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || -		  !rcu_segcblist_empty(&rdp->cblist), -		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", -		  cpu, rcu_segcblist_n_cbs(&rdp->cblist), -		  rcu_segcblist_first_cb(&rdp->cblist));  }  /* @@ -3569,10 +3473,11 @@ static void rcu_barrier_callback(struct rcu_head *rhp)  	struct rcu_state *rsp = rdp->rsp;  	if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { -		_rcu_barrier_trace(rsp, "LastCB", -1, rsp->barrier_sequence); +		_rcu_barrier_trace(rsp, TPS("LastCB"), -1, +				   rsp->barrier_sequence);  		complete(&rsp->barrier_completion);  	} else { -		_rcu_barrier_trace(rsp, "CB", -1, rsp->barrier_sequence); +		_rcu_barrier_trace(rsp, TPS("CB"), -1, rsp->barrier_sequence);  	}  } @@ -3584,14 +3489,15 @@ static void rcu_barrier_func(void *type)  	struct rcu_state *rsp = type;  	struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); -	_rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence); +	_rcu_barrier_trace(rsp, TPS("IRQ"), -1, rsp->barrier_sequence);  	rdp->barrier_head.func = rcu_barrier_callback;  	debug_rcu_head_queue(&rdp->barrier_head);  	if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {  		atomic_inc(&rsp->barrier_cpu_count);  	} else {  		debug_rcu_head_unqueue(&rdp->barrier_head); -		_rcu_barrier_trace(rsp, "IRQNQ", -1, rsp->barrier_sequence); +		_rcu_barrier_trace(rsp, TPS("IRQNQ"), -1, +				   rsp->barrier_sequence);  	}  } @@ -3605,14 +3511,15 @@ static void _rcu_barrier(struct rcu_state *rsp)  	struct rcu_data *rdp;  	unsigned long s = rcu_seq_snap(&rsp->barrier_sequence); -	_rcu_barrier_trace(rsp, "Begin", -1, s); +	_rcu_barrier_trace(rsp, TPS("Begin"), -1, s);  	/* Take mutex to serialize concurrent rcu_barrier() requests. */  	mutex_lock(&rsp->barrier_mutex);  	/* Did someone else do our work for us? */  	if (rcu_seq_done(&rsp->barrier_sequence, s)) { -		_rcu_barrier_trace(rsp, "EarlyExit", -1, rsp->barrier_sequence); +		_rcu_barrier_trace(rsp, TPS("EarlyExit"), -1, +				   rsp->barrier_sequence);  		smp_mb(); /* caller's subsequent code after above check. */  		mutex_unlock(&rsp->barrier_mutex);  		return; @@ -3620,7 +3527,7 @@ static void _rcu_barrier(struct rcu_state *rsp)  	/* Mark the start of the barrier operation. */  	rcu_seq_start(&rsp->barrier_sequence); -	_rcu_barrier_trace(rsp, "Inc1", -1, rsp->barrier_sequence); +	_rcu_barrier_trace(rsp, TPS("Inc1"), -1, rsp->barrier_sequence);  	/*  	 * Initialize the count to one rather than to zero in order to @@ -3643,10 +3550,10 @@ static void _rcu_barrier(struct rcu_state *rsp)  		rdp = per_cpu_ptr(rsp->rda, cpu);  		if (rcu_is_nocb_cpu(cpu)) {  			if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) { -				_rcu_barrier_trace(rsp, "OfflineNoCB", cpu, +				_rcu_barrier_trace(rsp, TPS("OfflineNoCB"), cpu,  						   rsp->barrier_sequence);  			} else { -				_rcu_barrier_trace(rsp, "OnlineNoCB", cpu, +				_rcu_barrier_trace(rsp, TPS("OnlineNoCB"), cpu,  						   rsp->barrier_sequence);  				smp_mb__before_atomic();  				atomic_inc(&rsp->barrier_cpu_count); @@ -3654,11 +3561,11 @@ static void _rcu_barrier(struct rcu_state *rsp)  					   rcu_barrier_callback, rsp, cpu, 0);  			}  		} else if (rcu_segcblist_n_cbs(&rdp->cblist)) { -			_rcu_barrier_trace(rsp, "OnlineQ", cpu, +			_rcu_barrier_trace(rsp, TPS("OnlineQ"), cpu,  					   rsp->barrier_sequence);  			smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);  		} else { -			_rcu_barrier_trace(rsp, "OnlineNQ", cpu, +			_rcu_barrier_trace(rsp, TPS("OnlineNQ"), cpu,  					   rsp->barrier_sequence);  		}  	} @@ -3675,7 +3582,7 @@ static void _rcu_barrier(struct rcu_state *rsp)  	wait_for_completion(&rsp->barrier_completion);  	/* Mark the end of the barrier operation. */ -	_rcu_barrier_trace(rsp, "Inc2", -1, rsp->barrier_sequence); +	_rcu_barrier_trace(rsp, TPS("Inc2"), -1, rsp->barrier_sequence);  	rcu_seq_end(&rsp->barrier_sequence);  	/* Other rcu_barrier() invocations can now safely proceed. */ @@ -3777,8 +3684,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)  	 */  	rnp = rdp->mynode;  	raw_spin_lock_rcu_node(rnp);		/* irqs already disabled. */ -	if (!rdp->beenonline) -		WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);  	rdp->beenonline = true;	 /* We have now been online. */  	rdp->gpnum = rnp->completed; /* Make CPU later note any new GP. */  	rdp->completed = rnp->completed; @@ -3882,6 +3787,8 @@ void rcu_cpu_starting(unsigned int cpu)  {  	unsigned long flags;  	unsigned long mask; +	int nbits; +	unsigned long oldmask;  	struct rcu_data *rdp;  	struct rcu_node *rnp;  	struct rcu_state *rsp; @@ -3892,9 +3799,15 @@ void rcu_cpu_starting(unsigned int cpu)  		mask = rdp->grpmask;  		raw_spin_lock_irqsave_rcu_node(rnp, flags);  		rnp->qsmaskinitnext |= mask; +		oldmask = rnp->expmaskinitnext;  		rnp->expmaskinitnext |= mask; +		oldmask ^= rnp->expmaskinitnext; +		nbits = bitmap_weight(&oldmask, BITS_PER_LONG); +		/* Allow lockless access for expedited grace periods. */ +		smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */  		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);  	} +	smp_mb(); /* Ensure RCU read-side usage follows above initialization. */  }  #ifdef CONFIG_HOTPLUG_CPU @@ -3937,6 +3850,50 @@ void rcu_report_dead(unsigned int cpu)  	for_each_rcu_flavor(rsp)  		rcu_cleanup_dying_idle_cpu(cpu, rsp);  } + +/* Migrate the dead CPU's callbacks to the current CPU. */ +static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp) +{ +	unsigned long flags; +	struct rcu_data *my_rdp; +	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); +	struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); + +	if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist)) +		return;  /* No callbacks to migrate. */ + +	local_irq_save(flags); +	my_rdp = this_cpu_ptr(rsp->rda); +	if (rcu_nocb_adopt_orphan_cbs(my_rdp, rdp, flags)) { +		local_irq_restore(flags); +		return; +	} +	raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ +	rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */ +	rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */ +	rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist); +	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != +		     !rcu_segcblist_n_cbs(&my_rdp->cblist)); +	raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags); +	WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || +		  !rcu_segcblist_empty(&rdp->cblist), +		  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", +		  cpu, rcu_segcblist_n_cbs(&rdp->cblist), +		  rcu_segcblist_first_cb(&rdp->cblist)); +} + +/* + * The outgoing CPU has just passed through the dying-idle state, + * and we are being invoked from the CPU that was IPIed to continue the + * offline operation.  We need to migrate the outgoing CPU's callbacks. + */ +void rcutree_migrate_callbacks(int cpu) +{ +	struct rcu_state *rsp; + +	for_each_rcu_flavor(rsp) +		rcu_migrate_callbacks(cpu, rsp); +}  #endif  /* @@ -4134,7 +4091,7 @@ static void __init rcu_init_geometry(void)  	if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&  	    nr_cpu_ids == NR_CPUS)  		return; -	pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n", +	pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%u\n",  		rcu_fanout_leaf, nr_cpu_ids);  	/* |