diff options
Diffstat (limited to 'kernel/rcu/tasks.h')
| -rw-r--r-- | kernel/rcu/tasks.h | 476 | 
1 files changed, 394 insertions, 82 deletions
| diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 7da3c81c3f59..84f1d91604cc 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -6,6 +6,7 @@   */  #ifdef CONFIG_TASKS_RCU_GENERIC +#include "rcu_segcblist.h"  ////////////////////////////////////////////////////////////////////////  // @@ -20,11 +21,33 @@ typedef void (*holdouts_func_t)(struct list_head *hop, bool ndrpt, bool *frptp);  typedef void (*postgp_func_t)(struct rcu_tasks *rtp);  /** + * struct rcu_tasks_percpu - Per-CPU component of definition for a Tasks-RCU-like mechanism. + * @cblist: Callback list. + * @lock: Lock protecting per-CPU callback list. + * @rtp_jiffies: Jiffies counter value for statistics. + * @rtp_n_lock_retries: Rough lock-contention statistic. + * @rtp_work: Work queue for invoking callbacks. + * @rtp_irq_work: IRQ work queue for deferred wakeups. + * @barrier_q_head: RCU callback for barrier operation. + * @cpu: CPU number corresponding to this entry. + * @rtpp: Pointer to the rcu_tasks structure. + */ +struct rcu_tasks_percpu { +	struct rcu_segcblist cblist; +	raw_spinlock_t __private lock; +	unsigned long rtp_jiffies; +	unsigned long rtp_n_lock_retries; +	struct work_struct rtp_work; +	struct irq_work rtp_irq_work; +	struct rcu_head barrier_q_head; +	int cpu; +	struct rcu_tasks *rtpp; +}; + +/**   * struct rcu_tasks - Definition for a Tasks-RCU-like mechanism. - * @cbs_head: Head of callback list. - * @cbs_tail: Tail pointer for callback list.   * @cbs_wq: Wait queue allowing new callback to get kthread's attention. - * @cbs_lock: Lock protecting callback list. + * @cbs_gbl_lock: Lock protecting callback list.   * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.   * @gp_func: This flavor's grace-period-wait function.   * @gp_state: Grace period's most recent state transition (debugging). @@ -32,7 +55,7 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);   * @init_fract: Initial backoff sleep interval.   * @gp_jiffies: Time of last @gp_state transition.   * @gp_start: Most recent grace-period start in jiffies. - * @n_gps: Number of grace periods completed since boot. + * @tasks_gp_seq: Number of grace periods completed since boot.   * @n_ipis: Number of IPIs sent to encourage grace periods to end.   * @n_ipis_fails: Number of IPI-send failures.   * @pregp_func: This flavor's pre-grace-period function (optional). @@ -41,20 +64,27 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);   * @holdouts_func: This flavor's holdout-list scan function (optional).   * @postgp_func: This flavor's post-grace-period function (optional).   * @call_func: This flavor's call_rcu()-equivalent function. + * @rtpcpu: This flavor's rcu_tasks_percpu structure. + * @percpu_enqueue_shift: Shift down CPU ID this much when enqueuing callbacks. + * @percpu_enqueue_lim: Number of per-CPU callback queues in use for enqueuing. + * @percpu_dequeue_lim: Number of per-CPU callback queues in use for dequeuing. + * @percpu_dequeue_gpseq: RCU grace-period number to propagate enqueue limit to dequeuers. + * @barrier_q_mutex: Serialize barrier operations. + * @barrier_q_count: Number of queues being waited on. + * @barrier_q_completion: Barrier wait/wakeup mechanism. + * @barrier_q_seq: Sequence number for barrier operations.   * @name: This flavor's textual name.   * @kname: This flavor's kthread name.   */  struct rcu_tasks { -	struct rcu_head *cbs_head; -	struct rcu_head **cbs_tail;  	struct wait_queue_head cbs_wq; -	raw_spinlock_t cbs_lock; +	raw_spinlock_t cbs_gbl_lock;  	int gp_state;  	int gp_sleep;  	int init_fract;  	unsigned long gp_jiffies;  	unsigned long gp_start; -	unsigned long n_gps; +	unsigned long tasks_gp_seq;  	unsigned long n_ipis;  	unsigned long n_ipis_fails;  	struct task_struct *kthread_ptr; @@ -65,20 +95,40 @@ struct rcu_tasks {  	holdouts_func_t holdouts_func;  	postgp_func_t postgp_func;  	call_rcu_func_t call_func; +	struct rcu_tasks_percpu __percpu *rtpcpu; +	int percpu_enqueue_shift; +	int percpu_enqueue_lim; +	int percpu_dequeue_lim; +	unsigned long percpu_dequeue_gpseq; +	struct mutex barrier_q_mutex; +	atomic_t barrier_q_count; +	struct completion barrier_q_completion; +	unsigned long barrier_q_seq;  	char *name;  	char *kname;  }; -#define DEFINE_RCU_TASKS(rt_name, gp, call, n)				\ -static struct rcu_tasks rt_name =					\ -{									\ -	.cbs_tail = &rt_name.cbs_head,					\ -	.cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq),	\ -	.cbs_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_lock),		\ -	.gp_func = gp,							\ -	.call_func = call,						\ -	.name = n,							\ -	.kname = #rt_name,						\ +static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp); + +#define DEFINE_RCU_TASKS(rt_name, gp, call, n)						\ +static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = {			\ +	.lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## __percpu.cbs_pcpu_lock),		\ +	.rtp_irq_work = IRQ_WORK_INIT(call_rcu_tasks_iw_wakeup),			\ +};											\ +static struct rcu_tasks rt_name =							\ +{											\ +	.cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq),			\ +	.cbs_gbl_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_gbl_lock),			\ +	.gp_func = gp,									\ +	.call_func = call,								\ +	.rtpcpu = &rt_name ## __percpu,							\ +	.name = n,									\ +	.percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS),					\ +	.percpu_enqueue_lim = 1,							\ +	.percpu_dequeue_lim = 1,							\ +	.barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex),		\ +	.barrier_q_seq = (0UL - 50UL) << RCU_SEQ_CTR_SHIFT,				\ +	.kname = #rt_name,								\  }  /* Track exiting tasks in order to allow them to be waited for. */ @@ -94,6 +144,15 @@ module_param(rcu_task_ipi_delay, int, 0644);  static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;  module_param(rcu_task_stall_timeout, int, 0644); +static int rcu_task_enqueue_lim __read_mostly = -1; +module_param(rcu_task_enqueue_lim, int, 0444); + +static bool rcu_task_cb_adjust; +static int rcu_task_contend_lim __read_mostly = 100; +module_param(rcu_task_contend_lim, int, 0444); +static int rcu_task_collapse_lim __read_mostly = 10; +module_param(rcu_task_collapse_lim, int, 0444); +  /* RCU tasks grace-period state for debugging. */  #define RTGS_INIT		 0  #define RTGS_WAIT_WAIT_CBS	 1 @@ -128,6 +187,8 @@ static const char * const rcu_tasks_gp_state_names[] = {  //  // Generic code. +static void rcu_tasks_invoke_cbs_wq(struct work_struct *wp); +  /* Record grace-period phase and time. */  static void set_tasks_gp_state(struct rcu_tasks *rtp, int newstate)  { @@ -148,23 +209,106 @@ static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)  }  #endif /* #ifndef CONFIG_TINY_RCU */ +// Initialize per-CPU callback lists for the specified flavor of +// Tasks RCU. +static void cblist_init_generic(struct rcu_tasks *rtp) +{ +	int cpu; +	unsigned long flags; +	int lim; + +	raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); +	if (rcu_task_enqueue_lim < 0) { +		rcu_task_enqueue_lim = 1; +		rcu_task_cb_adjust = true; +		pr_info("%s: Setting adjustable number of callback queues.\n", __func__); +	} else if (rcu_task_enqueue_lim == 0) { +		rcu_task_enqueue_lim = 1; +	} +	lim = rcu_task_enqueue_lim; + +	if (lim > nr_cpu_ids) +		lim = nr_cpu_ids; +	WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids / lim)); +	WRITE_ONCE(rtp->percpu_dequeue_lim, lim); +	smp_store_release(&rtp->percpu_enqueue_lim, lim); +	for_each_possible_cpu(cpu) { +		struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu); + +		WARN_ON_ONCE(!rtpcp); +		if (cpu) +			raw_spin_lock_init(&ACCESS_PRIVATE(rtpcp, lock)); +		raw_spin_lock_rcu_node(rtpcp); // irqs already disabled. +		if (rcu_segcblist_empty(&rtpcp->cblist)) +			rcu_segcblist_init(&rtpcp->cblist); +		INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq); +		rtpcp->cpu = cpu; +		rtpcp->rtpp = rtp; +		raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled. +	} +	raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags); +	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim)); +} + +// IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic(). +static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp) +{ +	struct rcu_tasks *rtp; +	struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct rcu_tasks_percpu, rtp_irq_work); + +	rtp = rtpcp->rtpp; +	wake_up(&rtp->cbs_wq); +} +  // Enqueue a callback for the specified flavor of Tasks RCU.  static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,  				   struct rcu_tasks *rtp)  {  	unsigned long flags; +	unsigned long j; +	bool needadjust = false;  	bool needwake; +	struct rcu_tasks_percpu *rtpcp;  	rhp->next = NULL;  	rhp->func = func; -	raw_spin_lock_irqsave(&rtp->cbs_lock, flags); -	needwake = !rtp->cbs_head; -	WRITE_ONCE(*rtp->cbs_tail, rhp); -	rtp->cbs_tail = &rhp->next; -	raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags); +	local_irq_save(flags); +	rcu_read_lock(); +	rtpcp = per_cpu_ptr(rtp->rtpcpu, +			    smp_processor_id() >> READ_ONCE(rtp->percpu_enqueue_shift)); +	if (!raw_spin_trylock_rcu_node(rtpcp)) { // irqs already disabled. +		raw_spin_lock_rcu_node(rtpcp); // irqs already disabled. +		j = jiffies; +		if (rtpcp->rtp_jiffies != j) { +			rtpcp->rtp_jiffies = j; +			rtpcp->rtp_n_lock_retries = 0; +		} +		if (rcu_task_cb_adjust && ++rtpcp->rtp_n_lock_retries > rcu_task_contend_lim && +		    READ_ONCE(rtp->percpu_enqueue_lim) != nr_cpu_ids) +			needadjust = true;  // Defer adjustment to avoid deadlock. +	} +	if (!rcu_segcblist_is_enabled(&rtpcp->cblist)) { +		raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled. +		cblist_init_generic(rtp); +		raw_spin_lock_rcu_node(rtpcp); // irqs already disabled. +	} +	needwake = rcu_segcblist_empty(&rtpcp->cblist); +	rcu_segcblist_enqueue(&rtpcp->cblist, rhp); +	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); +	if (unlikely(needadjust)) { +		raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); +		if (rtp->percpu_enqueue_lim != nr_cpu_ids) { +			WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids)); +			WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids); +			smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids); +			pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name); +		} +		raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags); +	} +	rcu_read_unlock();  	/* We can't create the thread unless interrupts are enabled. */  	if (needwake && READ_ONCE(rtp->kthread_ptr)) -		wake_up(&rtp->cbs_wq); +		irq_work_queue(&rtpcp->rtp_irq_work);  }  // Wait for a grace period for the specified flavor of Tasks RCU. @@ -178,12 +322,173 @@ static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)  	wait_rcu_gp(rtp->call_func);  } +// RCU callback function for rcu_barrier_tasks_generic(). +static void rcu_barrier_tasks_generic_cb(struct rcu_head *rhp) +{ +	struct rcu_tasks *rtp; +	struct rcu_tasks_percpu *rtpcp; + +	rtpcp = container_of(rhp, struct rcu_tasks_percpu, barrier_q_head); +	rtp = rtpcp->rtpp; +	if (atomic_dec_and_test(&rtp->barrier_q_count)) +		complete(&rtp->barrier_q_completion); +} + +// Wait for all in-flight callbacks for the specified RCU Tasks flavor. +// Operates in a manner similar to rcu_barrier(). +static void rcu_barrier_tasks_generic(struct rcu_tasks *rtp) +{ +	int cpu; +	unsigned long flags; +	struct rcu_tasks_percpu *rtpcp; +	unsigned long s = rcu_seq_snap(&rtp->barrier_q_seq); + +	mutex_lock(&rtp->barrier_q_mutex); +	if (rcu_seq_done(&rtp->barrier_q_seq, s)) { +		smp_mb(); +		mutex_unlock(&rtp->barrier_q_mutex); +		return; +	} +	rcu_seq_start(&rtp->barrier_q_seq); +	init_completion(&rtp->barrier_q_completion); +	atomic_set(&rtp->barrier_q_count, 2); +	for_each_possible_cpu(cpu) { +		if (cpu >= smp_load_acquire(&rtp->percpu_dequeue_lim)) +			break; +		rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu); +		rtpcp->barrier_q_head.func = rcu_barrier_tasks_generic_cb; +		raw_spin_lock_irqsave_rcu_node(rtpcp, flags); +		if (rcu_segcblist_entrain(&rtpcp->cblist, &rtpcp->barrier_q_head)) +			atomic_inc(&rtp->barrier_q_count); +		raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); +	} +	if (atomic_sub_and_test(2, &rtp->barrier_q_count)) +		complete(&rtp->barrier_q_completion); +	wait_for_completion(&rtp->barrier_q_completion); +	rcu_seq_end(&rtp->barrier_q_seq); +	mutex_unlock(&rtp->barrier_q_mutex); +} + +// Advance callbacks and indicate whether either a grace period or +// callback invocation is needed. +static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp) +{ +	int cpu; +	unsigned long flags; +	long n; +	long ncbs = 0; +	long ncbsnz = 0; +	int needgpcb = 0; + +	for (cpu = 0; cpu < smp_load_acquire(&rtp->percpu_dequeue_lim); cpu++) { +		struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu); + +		/* Advance and accelerate any new callbacks. */ +		if (!rcu_segcblist_n_cbs(&rtpcp->cblist)) +			continue; +		raw_spin_lock_irqsave_rcu_node(rtpcp, flags); +		// Should we shrink down to a single callback queue? +		n = rcu_segcblist_n_cbs(&rtpcp->cblist); +		if (n) { +			ncbs += n; +			if (cpu > 0) +				ncbsnz += n; +		} +		rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq)); +		(void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq)); +		if (rcu_segcblist_pend_cbs(&rtpcp->cblist)) +			needgpcb |= 0x3; +		if (!rcu_segcblist_empty(&rtpcp->cblist)) +			needgpcb |= 0x1; +		raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); +	} + +	// Shrink down to a single callback queue if appropriate. +	// This is done in two stages: (1) If there are no more than +	// rcu_task_collapse_lim callbacks on CPU 0 and none on any other +	// CPU, limit enqueueing to CPU 0.  (2) After an RCU grace period, +	// if there has not been an increase in callbacks, limit dequeuing +	// to CPU 0.  Note the matching RCU read-side critical section in +	// call_rcu_tasks_generic(). +	if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) { +		raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); +		if (rtp->percpu_enqueue_lim > 1) { +			WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids)); +			smp_store_release(&rtp->percpu_enqueue_lim, 1); +			rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu(); +			pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name); +		} +		raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags); +	} +	if (rcu_task_cb_adjust && !ncbsnz && +	    poll_state_synchronize_rcu(rtp->percpu_dequeue_gpseq)) { +		raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); +		if (rtp->percpu_enqueue_lim < rtp->percpu_dequeue_lim) { +			WRITE_ONCE(rtp->percpu_dequeue_lim, 1); +			pr_info("Completing switch %s to CPU-0 callback queuing.\n", rtp->name); +		} +		raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags); +	} + +	return needgpcb; +} + +// Advance callbacks and invoke any that are ready. +static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu *rtpcp) +{ +	int cpu; +	int cpunext; +	unsigned long flags; +	int len; +	struct rcu_head *rhp; +	struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl); +	struct rcu_tasks_percpu *rtpcp_next; + +	cpu = rtpcp->cpu; +	cpunext = cpu * 2 + 1; +	if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) { +		rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext); +		queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work); +		cpunext++; +		if (cpunext < smp_load_acquire(&rtp->percpu_dequeue_lim)) { +			rtpcp_next = per_cpu_ptr(rtp->rtpcpu, cpunext); +			queue_work_on(cpunext, system_wq, &rtpcp_next->rtp_work); +		} +	} + +	if (rcu_segcblist_empty(&rtpcp->cblist)) +		return; +	raw_spin_lock_irqsave_rcu_node(rtpcp, flags); +	rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq)); +	rcu_segcblist_extract_done_cbs(&rtpcp->cblist, &rcl); +	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); +	len = rcl.len; +	for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = rcu_cblist_dequeue(&rcl)) { +		local_bh_disable(); +		rhp->func(rhp); +		local_bh_enable(); +		cond_resched(); +	} +	raw_spin_lock_irqsave_rcu_node(rtpcp, flags); +	rcu_segcblist_add_len(&rtpcp->cblist, -len); +	(void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq)); +	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); +} + +// Workqueue flood to advance callbacks and invoke any that are ready. +static void rcu_tasks_invoke_cbs_wq(struct work_struct *wp) +{ +	struct rcu_tasks *rtp; +	struct rcu_tasks_percpu *rtpcp = container_of(wp, struct rcu_tasks_percpu, rtp_work); + +	rtp = rtpcp->rtpp; +	rcu_tasks_invoke_cbs(rtp, rtpcp); +} +  /* RCU-tasks kthread that detects grace periods and invokes callbacks. */  static int __noreturn rcu_tasks_kthread(void *arg)  { -	unsigned long flags; -	struct rcu_head *list; -	struct rcu_head *next; +	int needgpcb;  	struct rcu_tasks *rtp = arg;  	/* Run on housekeeping CPUs by default.  Sysadm can move if desired. */ @@ -199,42 +504,22 @@ static int __noreturn rcu_tasks_kthread(void *arg)  	for (;;) {  		set_tasks_gp_state(rtp, RTGS_WAIT_CBS); -		/* Pick up any new callbacks. */ -		raw_spin_lock_irqsave(&rtp->cbs_lock, flags); -		smp_mb__after_spinlock(); // Order updates vs. GP. -		list = rtp->cbs_head; -		rtp->cbs_head = NULL; -		rtp->cbs_tail = &rtp->cbs_head; -		raw_spin_unlock_irqrestore(&rtp->cbs_lock, flags); -  		/* If there were none, wait a bit and start over. */ -		if (!list) { -			wait_event_interruptible(rtp->cbs_wq, -						 READ_ONCE(rtp->cbs_head)); -			if (!rtp->cbs_head) { -				WARN_ON(signal_pending(current)); -				set_tasks_gp_state(rtp, RTGS_WAIT_WAIT_CBS); -				schedule_timeout_idle(HZ/10); -			} -			continue; +		wait_event_idle(rtp->cbs_wq, (needgpcb = rcu_tasks_need_gpcb(rtp))); + +		if (needgpcb & 0x2) { +			// Wait for one grace period. +			set_tasks_gp_state(rtp, RTGS_WAIT_GP); +			rtp->gp_start = jiffies; +			rcu_seq_start(&rtp->tasks_gp_seq); +			rtp->gp_func(rtp); +			rcu_seq_end(&rtp->tasks_gp_seq);  		} -		// Wait for one grace period. -		set_tasks_gp_state(rtp, RTGS_WAIT_GP); -		rtp->gp_start = jiffies; -		rtp->gp_func(rtp); -		rtp->n_gps++; - -		/* Invoke the callbacks. */ +		/* Invoke callbacks. */  		set_tasks_gp_state(rtp, RTGS_INVOKE_CBS); -		while (list) { -			next = list->next; -			local_bh_disable(); -			list->func(list); -			local_bh_enable(); -			list = next; -			cond_resched(); -		} +		rcu_tasks_invoke_cbs(rtp, per_cpu_ptr(rtp->rtpcpu, 0)); +  		/* Paranoid sleep to keep this from entering a tight loop */  		schedule_timeout_idle(rtp->gp_sleep);  	} @@ -279,14 +564,15 @@ static void __init rcu_tasks_bootup_oddness(void)  /* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */  static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)  { +	struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, 0); // for_each...  	pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",  		rtp->kname,  		tasks_gp_state_getname(rtp), data_race(rtp->gp_state),  		jiffies - data_race(rtp->gp_jiffies), -		data_race(rtp->n_gps), +		data_race(rcu_seq_current(&rtp->tasks_gp_seq)),  		data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),  		".k"[!!data_race(rtp->kthread_ptr)], -		".C"[!!data_race(rtp->cbs_head)], +		".C"[!data_race(rcu_segcblist_empty(&rtpcp->cblist))],  		s);  }  #endif // #ifndef CONFIG_TINY_RCU @@ -411,10 +697,10 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)  // exit_tasks_rcu_finish() functions begin and end, respectively, the SRCU  // read-side critical sections waited for by rcu_tasks_postscan().  // -// Pre-grace-period update-side code is ordered before the grace via the -// ->cbs_lock and the smp_mb__after_spinlock().  Pre-grace-period read-side -// code is ordered before the grace period via synchronize_rcu() call -// in rcu_tasks_pregp_step() and by the scheduler's locks and interrupt +// Pre-grace-period update-side code is ordered before the grace +// via the raw_spin_lock.*rcu_node().  Pre-grace-period read-side code +// is ordered before the grace period via synchronize_rcu() call in +// rcu_tasks_pregp_step() and by the scheduler's locks and interrupt  // disabling.  /* Pre-grace-period preparation. */ @@ -586,13 +872,13 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);   */  void rcu_barrier_tasks(void)  { -	/* There is only one callback queue, so this is easy.  ;-) */ -	synchronize_rcu_tasks(); +	rcu_barrier_tasks_generic(&rcu_tasks);  }  EXPORT_SYMBOL_GPL(rcu_barrier_tasks);  static int __init rcu_spawn_tasks_kthread(void)  { +	cblist_init_generic(&rcu_tasks);  	rcu_tasks.gp_sleep = HZ / 10;  	rcu_tasks.init_fract = HZ / 10;  	rcu_tasks.pregp_func = rcu_tasks_pregp_step; @@ -724,13 +1010,13 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_rude);   */  void rcu_barrier_tasks_rude(void)  { -	/* There is only one callback queue, so this is easy.  ;-) */ -	synchronize_rcu_tasks_rude(); +	rcu_barrier_tasks_generic(&rcu_tasks_rude);  }  EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);  static int __init rcu_spawn_tasks_rude_kthread(void)  { +	cblist_init_generic(&rcu_tasks_rude);  	rcu_tasks_rude.gp_sleep = HZ / 10;  	rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);  	return 0; @@ -1073,25 +1359,50 @@ static void rcu_tasks_trace_postscan(struct list_head *hop)  	// Any tasks that exit after this point will set ->trc_reader_checked.  } +/* Communicate task state back to the RCU tasks trace stall warning request. */ +struct trc_stall_chk_rdr { +	int nesting; +	int ipi_to_cpu; +	u8 needqs; +}; + +static int trc_check_slow_task(struct task_struct *t, void *arg) +{ +	struct trc_stall_chk_rdr *trc_rdrp = arg; + +	if (task_curr(t)) +		return false; // It is running, so decline to inspect it. +	trc_rdrp->nesting = READ_ONCE(t->trc_reader_nesting); +	trc_rdrp->ipi_to_cpu = READ_ONCE(t->trc_ipi_to_cpu); +	trc_rdrp->needqs = READ_ONCE(t->trc_reader_special.b.need_qs); +	return true; +} +  /* Show the state of a task stalling the current RCU tasks trace GP. */  static void show_stalled_task_trace(struct task_struct *t, bool *firstreport)  {  	int cpu; +	struct trc_stall_chk_rdr trc_rdr; +	bool is_idle_tsk = is_idle_task(t);  	if (*firstreport) {  		pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n");  		*firstreport = false;  	} -	// FIXME: This should attempt to use try_invoke_on_nonrunning_task().  	cpu = task_cpu(t); -	pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n", -		 t->pid, -		 ".I"[READ_ONCE(t->trc_ipi_to_cpu) >= 0], -		 ".i"[is_idle_task(t)], -		 ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)], -		 READ_ONCE(t->trc_reader_nesting), -		 " N"[!!READ_ONCE(t->trc_reader_special.b.need_qs)], -		 cpu); +	if (!task_call_func(t, trc_check_slow_task, &trc_rdr)) +		pr_alert("P%d: %c\n", +			 t->pid, +			 ".i"[is_idle_tsk]); +	else +		pr_alert("P%d: %c%c%c nesting: %d%c cpu: %d\n", +			 t->pid, +			 ".I"[trc_rdr.ipi_to_cpu >= 0], +			 ".i"[is_idle_tsk], +			 ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)], +			 trc_rdr.nesting, +			 " N"[!!trc_rdr.needqs], +			 cpu);  	sched_show_task(t);  } @@ -1121,7 +1432,8 @@ static void check_all_holdout_tasks_trace(struct list_head *hop,  			trc_wait_for_one_reader(t, hop);  		// If check succeeded, remove this task from the list. -		if (READ_ONCE(t->trc_reader_checked)) +		if (smp_load_acquire(&t->trc_ipi_to_cpu) == -1 && +		    READ_ONCE(t->trc_reader_checked))  			trc_del_holdout(t);  		else if (needreport)  			show_stalled_task_trace(t, firstreport); @@ -1156,7 +1468,7 @@ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)  	// Yes, this assumes that CPUs process IPIs in order.  If that ever  	// changes, there will need to be a recheck and/or timed wait.  	for_each_online_cpu(cpu) -		if (smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu))) +		if (WARN_ON_ONCE(smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu))))  			smp_call_function_single(cpu, rcu_tasks_trace_empty_fn, NULL, 1);  	// Remove the safety count. @@ -1256,13 +1568,13 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_trace);   */  void rcu_barrier_tasks_trace(void)  { -	/* There is only one callback queue, so this is easy.  ;-) */ -	synchronize_rcu_tasks_trace(); +	rcu_barrier_tasks_generic(&rcu_tasks_trace);  }  EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace);  static int __init rcu_spawn_tasks_trace_kthread(void)  { +	cblist_init_generic(&rcu_tasks_trace);  	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) {  		rcu_tasks_trace.gp_sleep = HZ / 10;  		rcu_tasks_trace.init_fract = HZ / 10; |