diff options
Diffstat (limited to 'kernel/rcu/tree_plugin.h')
| -rw-r--r-- | kernel/rcu/tree_plugin.h | 276 | 
1 files changed, 74 insertions, 202 deletions
| diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 3ec85cb5d544..0d7bbe3095ad 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -34,10 +34,6 @@  #include "../locking/rtmutex_common.h" -/* rcuc/rcub kthread realtime priority */ -static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO; -module_param(kthread_prio, int, 0644); -  /*   * Control variables for per-CPU and per-rcu_node kthreads.  These   * handle all flavors of RCU. @@ -53,7 +49,6 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work);  static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */  static bool have_rcu_nocb_mask;	    /* Was rcu_nocb_mask allocated? */  static bool __read_mostly rcu_nocb_poll;    /* Offload kthread are to poll. */ -static char __initdata nocb_buf[NR_CPUS * 5];  #endif /* #ifdef CONFIG_RCU_NOCB_CPU */  /* @@ -103,6 +98,8 @@ RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);  static struct rcu_state *rcu_state_p = &rcu_preempt_state;  static int rcu_preempted_readers_exp(struct rcu_node *rnp); +static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, +			       bool wake);  /*   * Tell them what RCU they are running. @@ -114,25 +111,6 @@ static void __init rcu_bootup_announce(void)  }  /* - * Return the number of RCU-preempt batches processed thus far - * for debug and statistics. - */ -static long rcu_batches_completed_preempt(void) -{ -	return rcu_preempt_state.completed; -} -EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt); - -/* - * Return the number of RCU batches processed thus far for debug & stats. - */ -long rcu_batches_completed(void) -{ -	return rcu_batches_completed_preempt(); -} -EXPORT_SYMBOL_GPL(rcu_batches_completed); - -/*   * Record a preemptible-RCU quiescent state for the specified CPU.  Note   * that this just means that the task currently running on the CPU is   * not in a quiescent state.  There might be any number of tasks blocked @@ -307,15 +285,25 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,  }  /* + * Return true if the specified rcu_node structure has tasks that were + * preempted within an RCU read-side critical section. + */ +static bool rcu_preempt_has_tasks(struct rcu_node *rnp) +{ +	return !list_empty(&rnp->blkd_tasks); +} + +/*   * Handle special cases during rcu_read_unlock(), such as needing to   * notify RCU core processing or task having blocked during the RCU   * read-side critical section.   */  void rcu_read_unlock_special(struct task_struct *t)  { -	int empty; -	int empty_exp; -	int empty_exp_now; +	bool empty; +	bool empty_exp; +	bool empty_norm; +	bool empty_exp_now;  	unsigned long flags;  	struct list_head *np;  #ifdef CONFIG_RCU_BOOST @@ -367,7 +355,8 @@ void rcu_read_unlock_special(struct task_struct *t)  				break;  			raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */  		} -		empty = !rcu_preempt_blocked_readers_cgp(rnp); +		empty = !rcu_preempt_has_tasks(rnp); +		empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);  		empty_exp = !rcu_preempted_readers_exp(rnp);  		smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */  		np = rcu_next_node_entry(t, rnp); @@ -387,13 +376,21 @@ void rcu_read_unlock_special(struct task_struct *t)  #endif /* #ifdef CONFIG_RCU_BOOST */  		/* +		 * If this was the last task on the list, go see if we +		 * need to propagate ->qsmaskinit bit clearing up the +		 * rcu_node tree. +		 */ +		if (!empty && !rcu_preempt_has_tasks(rnp)) +			rcu_cleanup_dead_rnp(rnp); + +		/*  		 * If this was the last task on the current list, and if  		 * we aren't waiting on any CPUs, report the quiescent state.  		 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock,  		 * so we must take a snapshot of the expedited state.  		 */  		empty_exp_now = !rcu_preempted_readers_exp(rnp); -		if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { +		if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {  			trace_rcu_quiescent_state_report(TPS("preempt_rcu"),  							 rnp->gpnum,  							 0, rnp->qsmask, @@ -408,10 +405,8 @@ void rcu_read_unlock_special(struct task_struct *t)  #ifdef CONFIG_RCU_BOOST  		/* Unboost if we were boosted. */ -		if (drop_boost_mutex) { +		if (drop_boost_mutex)  			rt_mutex_unlock(&rnp->boost_mtx); -			complete(&rnp->boost_completion); -		}  #endif /* #ifdef CONFIG_RCU_BOOST */  		/* @@ -519,99 +514,13 @@ static int rcu_print_task_stall(struct rcu_node *rnp)  static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)  {  	WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); -	if (!list_empty(&rnp->blkd_tasks)) +	if (rcu_preempt_has_tasks(rnp))  		rnp->gp_tasks = rnp->blkd_tasks.next;  	WARN_ON_ONCE(rnp->qsmask);  }  #ifdef CONFIG_HOTPLUG_CPU -/* - * Handle tasklist migration for case in which all CPUs covered by the - * specified rcu_node have gone offline.  Move them up to the root - * rcu_node.  The reason for not just moving them to the immediate - * parent is to remove the need for rcu_read_unlock_special() to - * make more than two attempts to acquire the target rcu_node's lock. - * Returns true if there were tasks blocking the current RCU grace - * period. - * - * Returns 1 if there was previously a task blocking the current grace - * period on the specified rcu_node structure. - * - * The caller must hold rnp->lock with irqs disabled. - */ -static int rcu_preempt_offline_tasks(struct rcu_state *rsp, -				     struct rcu_node *rnp, -				     struct rcu_data *rdp) -{ -	struct list_head *lp; -	struct list_head *lp_root; -	int retval = 0; -	struct rcu_node *rnp_root = rcu_get_root(rsp); -	struct task_struct *t; - -	if (rnp == rnp_root) { -		WARN_ONCE(1, "Last CPU thought to be offlined?"); -		return 0;  /* Shouldn't happen: at least one CPU online. */ -	} - -	/* If we are on an internal node, complain bitterly. */ -	WARN_ON_ONCE(rnp != rdp->mynode); - -	/* -	 * Move tasks up to root rcu_node.  Don't try to get fancy for -	 * this corner-case operation -- just put this node's tasks -	 * at the head of the root node's list, and update the root node's -	 * ->gp_tasks and ->exp_tasks pointers to those of this node's, -	 * if non-NULL.  This might result in waiting for more tasks than -	 * absolutely necessary, but this is a good performance/complexity -	 * tradeoff. -	 */ -	if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0) -		retval |= RCU_OFL_TASKS_NORM_GP; -	if (rcu_preempted_readers_exp(rnp)) -		retval |= RCU_OFL_TASKS_EXP_GP; -	lp = &rnp->blkd_tasks; -	lp_root = &rnp_root->blkd_tasks; -	while (!list_empty(lp)) { -		t = list_entry(lp->next, typeof(*t), rcu_node_entry); -		raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ -		smp_mb__after_unlock_lock(); -		list_del(&t->rcu_node_entry); -		t->rcu_blocked_node = rnp_root; -		list_add(&t->rcu_node_entry, lp_root); -		if (&t->rcu_node_entry == rnp->gp_tasks) -			rnp_root->gp_tasks = rnp->gp_tasks; -		if (&t->rcu_node_entry == rnp->exp_tasks) -			rnp_root->exp_tasks = rnp->exp_tasks; -#ifdef CONFIG_RCU_BOOST -		if (&t->rcu_node_entry == rnp->boost_tasks) -			rnp_root->boost_tasks = rnp->boost_tasks; -#endif /* #ifdef CONFIG_RCU_BOOST */ -		raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ -	} - -	rnp->gp_tasks = NULL; -	rnp->exp_tasks = NULL; -#ifdef CONFIG_RCU_BOOST -	rnp->boost_tasks = NULL; -	/* -	 * In case root is being boosted and leaf was not.  Make sure -	 * that we boost the tasks blocking the current grace period -	 * in this case. -	 */ -	raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ -	smp_mb__after_unlock_lock(); -	if (rnp_root->boost_tasks != NULL && -	    rnp_root->boost_tasks != rnp_root->gp_tasks && -	    rnp_root->boost_tasks != rnp_root->exp_tasks) -		rnp_root->boost_tasks = rnp_root->gp_tasks; -	raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */ -#endif /* #ifdef CONFIG_RCU_BOOST */ - -	return retval; -} -  #endif /* #ifdef CONFIG_HOTPLUG_CPU */  /* @@ -771,7 +680,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)  	raw_spin_lock_irqsave(&rnp->lock, flags);  	smp_mb__after_unlock_lock(); -	if (list_empty(&rnp->blkd_tasks)) { +	if (!rcu_preempt_has_tasks(rnp)) {  		raw_spin_unlock_irqrestore(&rnp->lock, flags);  	} else {  		rnp->exp_tasks = rnp->blkd_tasks.next; @@ -933,15 +842,6 @@ static void __init rcu_bootup_announce(void)  }  /* - * Return the number of RCU batches processed thus far for debug & stats. - */ -long rcu_batches_completed(void) -{ -	return rcu_batches_completed_sched(); -} -EXPORT_SYMBOL_GPL(rcu_batches_completed); - -/*   * Because preemptible RCU does not exist, we never have to check for   * CPUs being in quiescent states.   */ @@ -960,11 +860,12 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)  #ifdef CONFIG_HOTPLUG_CPU -/* Because preemptible RCU does not exist, no quieting of tasks. */ -static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) -	__releases(rnp->lock) +/* + * Because there is no preemptible RCU, there can be no readers blocked. + */ +static bool rcu_preempt_has_tasks(struct rcu_node *rnp)  { -	raw_spin_unlock_irqrestore(&rnp->lock, flags); +	return false;  }  #endif /* #ifdef CONFIG_HOTPLUG_CPU */ @@ -996,23 +897,6 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)  	WARN_ON_ONCE(rnp->qsmask);  } -#ifdef CONFIG_HOTPLUG_CPU - -/* - * Because preemptible RCU does not exist, it never needs to migrate - * tasks that were blocked within RCU read-side critical sections, and - * such non-existent tasks cannot possibly have been blocking the current - * grace period. - */ -static int rcu_preempt_offline_tasks(struct rcu_state *rsp, -				     struct rcu_node *rnp, -				     struct rcu_data *rdp) -{ -	return 0; -} - -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ -  /*   * Because preemptible RCU does not exist, it never has any callbacks   * to check. @@ -1031,20 +915,6 @@ void synchronize_rcu_expedited(void)  }  EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); -#ifdef CONFIG_HOTPLUG_CPU - -/* - * Because preemptible RCU does not exist, there is never any need to - * report on tasks preempted in RCU read-side critical sections during - * expedited RCU grace periods. - */ -static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, -			       bool wake) -{ -} - -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ -  /*   * Because preemptible RCU does not exist, rcu_barrier() is just   * another name for rcu_barrier_sched(). @@ -1080,7 +950,7 @@ void exit_rcu(void)  static void rcu_initiate_boost_trace(struct rcu_node *rnp)  { -	if (list_empty(&rnp->blkd_tasks)) +	if (!rcu_preempt_has_tasks(rnp))  		rnp->n_balk_blkd_tasks++;  	else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)  		rnp->n_balk_exp_gp_tasks++; @@ -1127,7 +997,8 @@ static int rcu_boost(struct rcu_node *rnp)  	struct task_struct *t;  	struct list_head *tb; -	if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) +	if (ACCESS_ONCE(rnp->exp_tasks) == NULL && +	    ACCESS_ONCE(rnp->boost_tasks) == NULL)  		return 0;  /* Nothing left to boost. */  	raw_spin_lock_irqsave(&rnp->lock, flags); @@ -1175,15 +1046,11 @@ static int rcu_boost(struct rcu_node *rnp)  	 */  	t = container_of(tb, struct task_struct, rcu_node_entry);  	rt_mutex_init_proxy_locked(&rnp->boost_mtx, t); -	init_completion(&rnp->boost_completion);  	raw_spin_unlock_irqrestore(&rnp->lock, flags);  	/* Lock only for side effect: boosts task t's priority. */  	rt_mutex_lock(&rnp->boost_mtx);  	rt_mutex_unlock(&rnp->boost_mtx);  /* Then keep lockdep happy. */ -	/* Wait for boostee to be done w/boost_mtx before reinitializing. */ -	wait_for_completion(&rnp->boost_completion); -  	return ACCESS_ONCE(rnp->exp_tasks) != NULL ||  	       ACCESS_ONCE(rnp->boost_tasks) != NULL;  } @@ -1416,12 +1283,8 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)  	for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)  		if ((mask & 0x1) && cpu != outgoingcpu)  			cpumask_set_cpu(cpu, cm); -	if (cpumask_weight(cm) == 0) { +	if (cpumask_weight(cm) == 0)  		cpumask_setall(cm); -		for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) -			cpumask_clear_cpu(cpu, cm); -		WARN_ON_ONCE(cpumask_weight(cm) == 0); -	}  	set_cpus_allowed_ptr(t, cm);  	free_cpumask_var(cm);  } @@ -1446,12 +1309,8 @@ static void __init rcu_spawn_boost_kthreads(void)  	for_each_possible_cpu(cpu)  		per_cpu(rcu_cpu_has_work, cpu) = 0;  	BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); -	rnp = rcu_get_root(rcu_state_p); -	(void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); -	if (NUM_RCU_NODES > 1) { -		rcu_for_each_leaf_node(rcu_state_p, rnp) -			(void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); -	} +	rcu_for_each_leaf_node(rcu_state_p, rnp) +		(void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);  }  static void rcu_prepare_kthreads(int cpu) @@ -1605,7 +1464,8 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)  		 * completed since we last checked and there are  		 * callbacks not yet ready to invoke.  		 */ -		if (rdp->completed != rnp->completed && +		if ((rdp->completed != rnp->completed || +		     unlikely(ACCESS_ONCE(rdp->gpwrap))) &&  		    rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])  			note_gp_changes(rsp, rdp); @@ -1898,11 +1758,12 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)  		ticks_value = rsp->gpnum - rdp->gpnum;  	}  	print_cpu_stall_fast_no_hz(fast_no_hz, cpu); -	pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n", +	pr_err("\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u fqs=%ld %s\n",  	       cpu, ticks_value, ticks_title,  	       atomic_read(&rdtp->dynticks) & 0xfff,  	       rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,  	       rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), +	       ACCESS_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart,  	       fast_no_hz);  } @@ -2056,9 +1917,26 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)  static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)  {  	struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); +	unsigned long ret; +#ifdef CONFIG_PROVE_RCU  	struct rcu_head *rhp; +#endif /* #ifdef CONFIG_PROVE_RCU */ -	/* No-CBs CPUs might have callbacks on any of three lists. */ +	/* +	 * Check count of all no-CBs callbacks awaiting invocation. +	 * There needs to be a barrier before this function is called, +	 * but associated with a prior determination that no more +	 * callbacks would be posted.  In the worst case, the first +	 * barrier in _rcu_barrier() suffices (but the caller cannot +	 * necessarily rely on this, not a substitute for the caller +	 * getting the concurrency design right!).  There must also be +	 * a barrier between the following load an posting of a callback +	 * (if a callback is in fact needed).  This is associated with an +	 * atomic_inc() in the caller. +	 */ +	ret = atomic_long_read(&rdp->nocb_q_count); + +#ifdef CONFIG_PROVE_RCU  	rhp = ACCESS_ONCE(rdp->nocb_head);  	if (!rhp)  		rhp = ACCESS_ONCE(rdp->nocb_gp_head); @@ -2072,8 +1950,9 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)  		       cpu, rhp->func);  		WARN_ON_ONCE(1);  	} +#endif /* #ifdef CONFIG_PROVE_RCU */ -	return !!rhp; +	return !!ret;  }  /* @@ -2095,9 +1974,10 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,  	struct task_struct *t;  	/* Enqueue the callback on the nocb list and update counts. */ +	atomic_long_add(rhcount, &rdp->nocb_q_count); +	/* rcu_barrier() relies on ->nocb_q_count add before xchg. */  	old_rhpp = xchg(&rdp->nocb_tail, rhtp);  	ACCESS_ONCE(*old_rhpp) = rhp; -	atomic_long_add(rhcount, &rdp->nocb_q_count);  	atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);  	smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */ @@ -2288,9 +2168,6 @@ wait_again:  		/* Move callbacks to wait-for-GP list, which is empty. */  		ACCESS_ONCE(rdp->nocb_head) = NULL;  		rdp->nocb_gp_tail = xchg(&rdp->nocb_tail, &rdp->nocb_head); -		rdp->nocb_gp_count = atomic_long_xchg(&rdp->nocb_q_count, 0); -		rdp->nocb_gp_count_lazy = -			atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);  		gotcbs = true;  	} @@ -2338,9 +2215,6 @@ wait_again:  		/* Append callbacks to follower's "done" list. */  		tail = xchg(&rdp->nocb_follower_tail, rdp->nocb_gp_tail);  		*tail = rdp->nocb_gp_head; -		atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count); -		atomic_long_add(rdp->nocb_gp_count_lazy, -				&rdp->nocb_follower_count_lazy);  		smp_mb__after_atomic(); /* Store *tail before wakeup. */  		if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {  			/* @@ -2415,13 +2289,11 @@ static int rcu_nocb_kthread(void *arg)  		trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, "WokeNonEmpty");  		ACCESS_ONCE(rdp->nocb_follower_head) = NULL;  		tail = xchg(&rdp->nocb_follower_tail, &rdp->nocb_follower_head); -		c = atomic_long_xchg(&rdp->nocb_follower_count, 0); -		cl = atomic_long_xchg(&rdp->nocb_follower_count_lazy, 0); -		rdp->nocb_p_count += c; -		rdp->nocb_p_count_lazy += cl;  		/* Each pass through the following loop invokes a callback. */ -		trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); +		trace_rcu_batch_start(rdp->rsp->name, +				      atomic_long_read(&rdp->nocb_q_count_lazy), +				      atomic_long_read(&rdp->nocb_q_count), -1);  		c = cl = 0;  		while (list) {  			next = list->next; @@ -2443,9 +2315,9 @@ static int rcu_nocb_kthread(void *arg)  			list = next;  		}  		trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); -		ACCESS_ONCE(rdp->nocb_p_count) = rdp->nocb_p_count - c; -		ACCESS_ONCE(rdp->nocb_p_count_lazy) = -						rdp->nocb_p_count_lazy - cl; +		smp_mb__before_atomic();  /* _add after CB invocation. */ +		atomic_long_add(-c, &rdp->nocb_q_count); +		atomic_long_add(-cl, &rdp->nocb_q_count_lazy);  		rdp->n_nocbs_invoked += c;  	}  	return 0; @@ -2513,8 +2385,8 @@ void __init rcu_init_nohz(void)  		cpumask_and(rcu_nocb_mask, cpu_possible_mask,  			    rcu_nocb_mask);  	} -	cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); -	pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf); +	pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n", +		cpumask_pr_args(rcu_nocb_mask));  	if (rcu_nocb_poll)  		pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); |