diff options
Diffstat (limited to 'kernel/rcu/tree.c')
| -rw-r--r-- | kernel/rcu/tree.c | 97 | 
1 files changed, 82 insertions, 15 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index dd081987a8ec..b3d116cd072d 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -369,6 +369,9 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)  static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,  				bool user)  { +	struct rcu_state *rsp; +	struct rcu_data *rdp; +  	trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);  	if (!user && !is_idle_task(current)) {  		struct task_struct *idle __maybe_unused = @@ -380,6 +383,10 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,  			  current->pid, current->comm,  			  idle->pid, idle->comm); /* must be idle task! */  	} +	for_each_rcu_flavor(rsp) { +		rdp = this_cpu_ptr(rsp->rda); +		do_nocb_deferred_wakeup(rdp); +	}  	rcu_prepare_for_idle(smp_processor_id());  	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */  	smp_mb__before_atomic_inc();  /* See above. */ @@ -411,11 +418,12 @@ static void rcu_eqs_enter(bool user)  	rdtp = this_cpu_ptr(&rcu_dynticks);  	oldval = rdtp->dynticks_nesting;  	WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); -	if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) +	if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) {  		rdtp->dynticks_nesting = 0; -	else +		rcu_eqs_enter_common(rdtp, oldval, user); +	} else {  		rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE; -	rcu_eqs_enter_common(rdtp, oldval, user); +	}  }  /** @@ -533,11 +541,12 @@ static void rcu_eqs_exit(bool user)  	rdtp = this_cpu_ptr(&rcu_dynticks);  	oldval = rdtp->dynticks_nesting;  	WARN_ON_ONCE(oldval < 0); -	if (oldval & DYNTICK_TASK_NEST_MASK) +	if (oldval & DYNTICK_TASK_NEST_MASK) {  		rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE; -	else +	} else {  		rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; -	rcu_eqs_exit_common(rdtp, oldval, user); +		rcu_eqs_exit_common(rdtp, oldval, user); +	}  }  /** @@ -716,7 +725,7 @@ bool rcu_lockdep_current_cpu_online(void)  	bool ret;  	if (in_nmi()) -		return 1; +		return true;  	preempt_disable();  	rdp = this_cpu_ptr(&rcu_sched_data);  	rnp = rdp->mynode; @@ -755,6 +764,12 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,  }  /* + * This function really isn't for public consumption, but RCU is special in + * that context switches can allow the state machine to make progress. + */ +extern void resched_cpu(int cpu); + +/*   * Return true if the specified CPU has passed through a quiescent   * state by virtue of being in or having passed through an dynticks   * idle state since the last call to dyntick_save_progress_counter() @@ -812,16 +827,34 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,  	 */  	rcu_kick_nohz_cpu(rdp->cpu); +	/* +	 * Alternatively, the CPU might be running in the kernel +	 * for an extended period of time without a quiescent state. +	 * Attempt to force the CPU through the scheduler to gain the +	 * needed quiescent state, but only if the grace period has gone +	 * on for an uncommonly long time.  If there are many stuck CPUs, +	 * we will beat on the first one until it gets unstuck, then move +	 * to the next.  Only do this for the primary flavor of RCU. +	 */ +	if (rdp->rsp == rcu_state && +	    ULONG_CMP_GE(ACCESS_ONCE(jiffies), rdp->rsp->jiffies_resched)) { +		rdp->rsp->jiffies_resched += 5; +		resched_cpu(rdp->cpu); +	} +  	return 0;  }  static void record_gp_stall_check_time(struct rcu_state *rsp)  {  	unsigned long j = ACCESS_ONCE(jiffies); +	unsigned long j1;  	rsp->gp_start = j;  	smp_wmb(); /* Record start time before stall time. */ -	rsp->jiffies_stall = j + rcu_jiffies_till_stall_check(); +	j1 = rcu_jiffies_till_stall_check(); +	rsp->jiffies_stall = j + j1; +	rsp->jiffies_resched = j + j1 / 2;  }  /* @@ -1133,8 +1166,10 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)  	 * hold it, acquire the root rcu_node structure's lock in order to  	 * start one (if needed).  	 */ -	if (rnp != rnp_root) +	if (rnp != rnp_root) {  		raw_spin_lock(&rnp_root->lock); +		smp_mb__after_unlock_lock(); +	}  	/*  	 * Get a new grace-period number.  If there really is no grace @@ -1354,6 +1389,7 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)  		local_irq_restore(flags);  		return;  	} +	smp_mb__after_unlock_lock();  	__note_gp_changes(rsp, rnp, rdp);  	raw_spin_unlock_irqrestore(&rnp->lock, flags);  } @@ -1368,6 +1404,7 @@ static int rcu_gp_init(struct rcu_state *rsp)  	rcu_bind_gp_kthread();  	raw_spin_lock_irq(&rnp->lock); +	smp_mb__after_unlock_lock();  	if (rsp->gp_flags == 0) {  		/* Spurious wakeup, tell caller to go back to sleep.  */  		raw_spin_unlock_irq(&rnp->lock); @@ -1409,6 +1446,7 @@ static int rcu_gp_init(struct rcu_state *rsp)  	 */  	rcu_for_each_node_breadth_first(rsp, rnp) {  		raw_spin_lock_irq(&rnp->lock); +		smp_mb__after_unlock_lock();  		rdp = this_cpu_ptr(rsp->rda);  		rcu_preempt_check_blocked_tasks(rnp);  		rnp->qsmask = rnp->qsmaskinit; @@ -1463,6 +1501,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)  	/* Clear flag to prevent immediate re-entry. */  	if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {  		raw_spin_lock_irq(&rnp->lock); +		smp_mb__after_unlock_lock();  		rsp->gp_flags &= ~RCU_GP_FLAG_FQS;  		raw_spin_unlock_irq(&rnp->lock);  	} @@ -1480,6 +1519,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)  	struct rcu_node *rnp = rcu_get_root(rsp);  	raw_spin_lock_irq(&rnp->lock); +	smp_mb__after_unlock_lock();  	gp_duration = jiffies - rsp->gp_start;  	if (gp_duration > rsp->gp_max)  		rsp->gp_max = gp_duration; @@ -1505,16 +1545,19 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)  	 */  	rcu_for_each_node_breadth_first(rsp, rnp) {  		raw_spin_lock_irq(&rnp->lock); +		smp_mb__after_unlock_lock();  		ACCESS_ONCE(rnp->completed) = rsp->gpnum;  		rdp = this_cpu_ptr(rsp->rda);  		if (rnp == rdp->mynode)  			__note_gp_changes(rsp, rnp, rdp); +		/* smp_mb() provided by prior unlock-lock pair. */  		nocb += rcu_future_gp_cleanup(rsp, rnp);  		raw_spin_unlock_irq(&rnp->lock);  		cond_resched();  	}  	rnp = rcu_get_root(rsp);  	raw_spin_lock_irq(&rnp->lock); +	smp_mb__after_unlock_lock();  	rcu_nocb_gp_set(rnp, nocb);  	rsp->completed = rsp->gpnum; /* Declare grace period done. */ @@ -1553,6 +1596,7 @@ static int __noreturn rcu_gp_kthread(void *arg)  			wait_event_interruptible(rsp->gp_wq,  						 ACCESS_ONCE(rsp->gp_flags) &  						 RCU_GP_FLAG_INIT); +			/* Locking provides needed memory barrier. */  			if (rcu_gp_init(rsp))  				break;  			cond_resched(); @@ -1582,6 +1626,7 @@ static int __noreturn rcu_gp_kthread(void *arg)  					(!ACCESS_ONCE(rnp->qsmask) &&  					 !rcu_preempt_blocked_readers_cgp(rnp)),  					j); +			/* Locking provides needed memory barriers. */  			/* If grace period done, leave loop. */  			if (!ACCESS_ONCE(rnp->qsmask) &&  			    !rcu_preempt_blocked_readers_cgp(rnp)) @@ -1749,6 +1794,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,  		rnp_c = rnp;  		rnp = rnp->parent;  		raw_spin_lock_irqsave(&rnp->lock, flags); +		smp_mb__after_unlock_lock();  		WARN_ON_ONCE(rnp_c->qsmask);  	} @@ -1778,6 +1824,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)  	rnp = rdp->mynode;  	raw_spin_lock_irqsave(&rnp->lock, flags); +	smp_mb__after_unlock_lock();  	if (rdp->passed_quiesce == 0 || rdp->gpnum != rnp->gpnum ||  	    rnp->completed == rnp->gpnum) { @@ -1901,13 +1948,13 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,   * Adopt the RCU callbacks from the specified rcu_state structure's   * orphanage.  The caller must hold the ->orphan_lock.   */ -static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) +static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)  {  	int i;  	struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);  	/* No-CBs CPUs are handled specially. */ -	if (rcu_nocb_adopt_orphan_cbs(rsp, rdp)) +	if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))  		return;  	/* Do the accounting first. */ @@ -1986,12 +2033,13 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)  	/* Orphan the dead CPU's callbacks, and adopt them if appropriate. */  	rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); -	rcu_adopt_orphan_cbs(rsp); +	rcu_adopt_orphan_cbs(rsp, flags);  	/* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */  	mask = rdp->grpmask;	/* rnp->grplo is constant. */  	do {  		raw_spin_lock(&rnp->lock);	/* irqs already disabled. */ +		smp_mb__after_unlock_lock();  		rnp->qsmaskinit &= ~mask;  		if (rnp->qsmaskinit != 0) {  			if (rnp != rdp->mynode) @@ -2202,6 +2250,7 @@ static void force_qs_rnp(struct rcu_state *rsp,  		cond_resched();  		mask = 0;  		raw_spin_lock_irqsave(&rnp->lock, flags); +		smp_mb__after_unlock_lock();  		if (!rcu_gp_in_progress(rsp)) {  			raw_spin_unlock_irqrestore(&rnp->lock, flags);  			return; @@ -2231,6 +2280,7 @@ static void force_qs_rnp(struct rcu_state *rsp,  	rnp = rcu_get_root(rsp);  	if (rnp->qsmask == 0) {  		raw_spin_lock_irqsave(&rnp->lock, flags); +		smp_mb__after_unlock_lock();  		rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */  	}  } @@ -2263,6 +2313,7 @@ static void force_quiescent_state(struct rcu_state *rsp)  	/* Reached the root of the rcu_node tree, acquire lock. */  	raw_spin_lock_irqsave(&rnp_old->lock, flags); +	smp_mb__after_unlock_lock();  	raw_spin_unlock(&rnp_old->fqslock);  	if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {  		rsp->n_force_qs_lh++; @@ -2303,6 +2354,9 @@ __rcu_process_callbacks(struct rcu_state *rsp)  	/* If there are callbacks ready, invoke them. */  	if (cpu_has_callbacks_ready_to_invoke(rdp))  		invoke_rcu_callbacks(rsp, rdp); + +	/* Do any needed deferred wakeups of rcuo kthreads. */ +	do_nocb_deferred_wakeup(rdp);  }  /* @@ -2378,6 +2432,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,  			struct rcu_node *rnp_root = rcu_get_root(rsp);  			raw_spin_lock(&rnp_root->lock); +			smp_mb__after_unlock_lock();  			rcu_start_gp(rsp);  			raw_spin_unlock(&rnp_root->lock);  		} else { @@ -2437,7 +2492,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),  		if (cpu != -1)  			rdp = per_cpu_ptr(rsp->rda, cpu); -		offline = !__call_rcu_nocb(rdp, head, lazy); +		offline = !__call_rcu_nocb(rdp, head, lazy, flags);  		WARN_ON_ONCE(offline);  		/* _call_rcu() is illegal on offline CPU; leak the callback. */  		local_irq_restore(flags); @@ -2757,6 +2812,10 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)  	/* Check for CPU stalls, if enabled. */  	check_cpu_stall(rsp, rdp); +	/* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */ +	if (rcu_nohz_full_cpu(rsp)) +		return 0; +  	/* Is the RCU core waiting for a quiescent state from this CPU? */  	if (rcu_scheduler_fully_active &&  	    rdp->qs_pending && !rdp->passed_quiesce) { @@ -2790,6 +2849,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)  		return 1;  	} +	/* Does this CPU need a deferred NOCB wakeup? */ +	if (rcu_nocb_need_deferred_wakeup(rdp)) { +		rdp->n_rp_nocb_defer_wakeup++; +		return 1; +	} +  	/* nothing to do */  	rdp->n_rp_need_nothing++;  	return 0; @@ -3214,9 +3279,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)  {  	int i; -	for (i = rcu_num_lvls - 1; i > 0; i--) +	rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; +	for (i = rcu_num_lvls - 2; i >= 0; i--)  		rsp->levelspread[i] = CONFIG_RCU_FANOUT; -	rsp->levelspread[0] = rcu_fanout_leaf;  }  #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */  static void __init rcu_init_levelspread(struct rcu_state *rsp) @@ -3346,6 +3411,8 @@ static void __init rcu_init_geometry(void)  	if (rcu_fanout_leaf == CONFIG_RCU_FANOUT_LEAF &&  	    nr_cpu_ids == NR_CPUS)  		return; +	pr_info("RCU: Adjusting geometry for rcu_fanout_leaf=%d, nr_cpu_ids=%d\n", +		rcu_fanout_leaf, nr_cpu_ids);  	/*  	 * Compute number of nodes that can be handled an rcu_node tree  |