diff options
Diffstat (limited to 'kernel/time/sched_clock.c')
| -rw-r--r-- | kernel/time/sched_clock.c | 236 | 
1 files changed, 161 insertions, 75 deletions
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 01d2d15aa662..a26036d37a38 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -1,5 +1,6 @@  /* - * sched_clock.c: support for extending counters to full 64-bit ns counter + * sched_clock.c: Generic sched_clock() support, to extend low level + *                hardware time counters to full 64-bit ns values.   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -18,15 +19,53 @@  #include <linux/seqlock.h>  #include <linux/bitops.h> -struct clock_data { -	ktime_t wrap_kt; +/** + * struct clock_read_data - data required to read from sched_clock() + * + * @epoch_ns:		sched_clock() value at last update + * @epoch_cyc:		Clock cycle value at last update. + * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit + *			clocks. + * @read_sched_clock:	Current clock source (or dummy source when suspended). + * @mult:		Multipler for scaled math conversion. + * @shift:		Shift value for scaled math conversion. + * + * Care must be taken when updating this structure; it is read by + * some very hot code paths. It occupies <=40 bytes and, when combined + * with the seqcount used to synchronize access, comfortably fits into + * a 64 byte cache line. + */ +struct clock_read_data {  	u64 epoch_ns;  	u64 epoch_cyc; -	seqcount_t seq; -	unsigned long rate; +	u64 sched_clock_mask; +	u64 (*read_sched_clock)(void);  	u32 mult;  	u32 shift; -	bool suspended; +}; + +/** + * struct clock_data - all data needed for sched_clock() (including + *                     registration of a new clock source) + * + * @seq:		Sequence counter for protecting updates. The lowest + *			bit is the index for @read_data. + * @read_data:		Data required to read from sched_clock. + * @wrap_kt:		Duration for which clock can run before wrapping. + * @rate:		Tick rate of the registered clock. + * @actual_read_sched_clock: Registered hardware level clock read function. + * + * The ordering of this structure has been chosen to optimize cache + * performance. In particular 'seq' and 'read_data[0]' (combined) should fit + * into a single 64-byte cache line. + */ +struct clock_data { +	seqcount_t		seq; +	struct clock_read_data	read_data[2]; +	ktime_t			wrap_kt; +	unsigned long		rate; + +	u64 (*actual_read_sched_clock)(void);  };  static struct hrtimer sched_clock_timer; @@ -34,12 +73,6 @@ static int irqtime = -1;  core_param(irqtime, irqtime, int, 0400); -static struct clock_data cd = { -	.mult	= NSEC_PER_SEC / HZ, -}; - -static u64 __read_mostly sched_clock_mask; -  static u64 notrace jiffy_sched_clock_read(void)  {  	/* @@ -49,7 +82,11 @@ static u64 notrace jiffy_sched_clock_read(void)  	return (u64)(jiffies - INITIAL_JIFFIES);  } -static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read; +static struct clock_data cd ____cacheline_aligned = { +	.read_data[0] = { .mult = NSEC_PER_SEC / HZ, +			  .read_sched_clock = jiffy_sched_clock_read, }, +	.actual_read_sched_clock = jiffy_sched_clock_read, +};  static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)  { @@ -58,111 +95,136 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)  unsigned long long notrace sched_clock(void)  { -	u64 epoch_ns; -	u64 epoch_cyc; -	u64 cyc; +	u64 cyc, res;  	unsigned long seq; - -	if (cd.suspended) -		return cd.epoch_ns; +	struct clock_read_data *rd;  	do { -		seq = raw_read_seqcount_begin(&cd.seq); -		epoch_cyc = cd.epoch_cyc; -		epoch_ns = cd.epoch_ns; +		seq = raw_read_seqcount(&cd.seq); +		rd = cd.read_data + (seq & 1); + +		cyc = (rd->read_sched_clock() - rd->epoch_cyc) & +		      rd->sched_clock_mask; +		res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);  	} while (read_seqcount_retry(&cd.seq, seq)); -	cyc = read_sched_clock(); -	cyc = (cyc - epoch_cyc) & sched_clock_mask; -	return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift); +	return res; +} + +/* + * Updating the data required to read the clock. + * + * sched_clock() will never observe mis-matched data even if called from + * an NMI. We do this by maintaining an odd/even copy of the data and + * steering sched_clock() to one or the other using a sequence counter. + * In order to preserve the data cache profile of sched_clock() as much + * as possible the system reverts back to the even copy when the update + * completes; the odd copy is used *only* during an update. + */ +static void update_clock_read_data(struct clock_read_data *rd) +{ +	/* update the backup (odd) copy with the new data */ +	cd.read_data[1] = *rd; + +	/* steer readers towards the odd copy */ +	raw_write_seqcount_latch(&cd.seq); + +	/* now its safe for us to update the normal (even) copy */ +	cd.read_data[0] = *rd; + +	/* switch readers back to the even copy */ +	raw_write_seqcount_latch(&cd.seq);  }  /* - * Atomically update the sched_clock epoch. + * Atomically update the sched_clock() epoch.   */ -static void notrace update_sched_clock(void) +static void update_sched_clock(void)  { -	unsigned long flags;  	u64 cyc;  	u64 ns; +	struct clock_read_data rd; + +	rd = cd.read_data[0]; + +	cyc = cd.actual_read_sched_clock(); +	ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); + +	rd.epoch_ns = ns; +	rd.epoch_cyc = cyc; -	cyc = read_sched_clock(); -	ns = cd.epoch_ns + -		cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, -			  cd.mult, cd.shift); - -	raw_local_irq_save(flags); -	raw_write_seqcount_begin(&cd.seq); -	cd.epoch_ns = ns; -	cd.epoch_cyc = cyc; -	raw_write_seqcount_end(&cd.seq); -	raw_local_irq_restore(flags); +	update_clock_read_data(&rd);  }  static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)  {  	update_sched_clock();  	hrtimer_forward_now(hrt, cd.wrap_kt); +  	return HRTIMER_RESTART;  } -void __init sched_clock_register(u64 (*read)(void), int bits, -				 unsigned long rate) +void __init +sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)  {  	u64 res, wrap, new_mask, new_epoch, cyc, ns;  	u32 new_mult, new_shift; -	ktime_t new_wrap_kt;  	unsigned long r;  	char r_unit; +	struct clock_read_data rd;  	if (cd.rate > rate)  		return;  	WARN_ON(!irqs_disabled()); -	/* calculate the mult/shift to convert counter ticks to ns. */ +	/* Calculate the mult/shift to convert counter ticks to ns. */  	clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);  	new_mask = CLOCKSOURCE_MASK(bits); +	cd.rate = rate; + +	/* Calculate how many nanosecs until we risk wrapping */ +	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL); +	cd.wrap_kt = ns_to_ktime(wrap); -	/* calculate how many ns until we wrap */ -	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask); -	new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); +	rd = cd.read_data[0]; -	/* update epoch for new counter and update epoch_ns from old counter*/ +	/* Update epoch for new counter and update 'epoch_ns' from old counter*/  	new_epoch = read(); -	cyc = read_sched_clock(); -	ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, -			  cd.mult, cd.shift); +	cyc = cd.actual_read_sched_clock(); +	ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift); +	cd.actual_read_sched_clock = read; -	raw_write_seqcount_begin(&cd.seq); -	read_sched_clock = read; -	sched_clock_mask = new_mask; -	cd.rate = rate; -	cd.wrap_kt = new_wrap_kt; -	cd.mult = new_mult; -	cd.shift = new_shift; -	cd.epoch_cyc = new_epoch; -	cd.epoch_ns = ns; -	raw_write_seqcount_end(&cd.seq); +	rd.read_sched_clock	= read; +	rd.sched_clock_mask	= new_mask; +	rd.mult			= new_mult; +	rd.shift		= new_shift; +	rd.epoch_cyc		= new_epoch; +	rd.epoch_ns		= ns; + +	update_clock_read_data(&rd);  	r = rate;  	if (r >= 4000000) {  		r /= 1000000;  		r_unit = 'M'; -	} else if (r >= 1000) { -		r /= 1000; -		r_unit = 'k'; -	} else -		r_unit = ' '; - -	/* calculate the ns resolution of this counter */ +	} else { +		if (r >= 1000) { +			r /= 1000; +			r_unit = 'k'; +		} else { +			r_unit = ' '; +		} +	} + +	/* Calculate the ns resolution of this counter */  	res = cyc_to_ns(1ULL, new_mult, new_shift);  	pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",  		bits, r, r_unit, res, wrap); -	/* Enable IRQ time accounting if we have a fast enough sched_clock */ +	/* Enable IRQ time accounting if we have a fast enough sched_clock() */  	if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))  		enable_sched_clock_irqtime(); @@ -172,10 +234,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits,  void __init sched_clock_postinit(void)  {  	/* -	 * If no sched_clock function has been provided at that point, +	 * If no sched_clock() function has been provided at that point,  	 * make it the final one one.  	 */ -	if (read_sched_clock == jiffy_sched_clock_read) +	if (cd.actual_read_sched_clock == jiffy_sched_clock_read)  		sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);  	update_sched_clock(); @@ -189,29 +251,53 @@ void __init sched_clock_postinit(void)  	hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);  } +/* + * Clock read function for use when the clock is suspended. + * + * This function makes it appear to sched_clock() as if the clock + * stopped counting at its last update. + * + * This function must only be called from the critical + * section in sched_clock(). It relies on the read_seqcount_retry() + * at the end of the critical section to be sure we observe the + * correct copy of 'epoch_cyc'. + */ +static u64 notrace suspended_sched_clock_read(void) +{ +	unsigned long seq = raw_read_seqcount(&cd.seq); + +	return cd.read_data[seq & 1].epoch_cyc; +} +  static int sched_clock_suspend(void)  { +	struct clock_read_data *rd = &cd.read_data[0]; +  	update_sched_clock();  	hrtimer_cancel(&sched_clock_timer); -	cd.suspended = true; +	rd->read_sched_clock = suspended_sched_clock_read; +  	return 0;  }  static void sched_clock_resume(void)  { -	cd.epoch_cyc = read_sched_clock(); +	struct clock_read_data *rd = &cd.read_data[0]; + +	rd->epoch_cyc = cd.actual_read_sched_clock();  	hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); -	cd.suspended = false; +	rd->read_sched_clock = cd.actual_read_sched_clock;  }  static struct syscore_ops sched_clock_ops = { -	.suspend = sched_clock_suspend, -	.resume = sched_clock_resume, +	.suspend	= sched_clock_suspend, +	.resume		= sched_clock_resume,  };  static int __init sched_clock_syscore_init(void)  {  	register_syscore_ops(&sched_clock_ops); +  	return 0;  }  device_initcall(sched_clock_syscore_init);  |