diff options
Diffstat (limited to 'kernel/time')
| -rw-r--r-- | kernel/time/clocksource.c | 52 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 7 | ||||
| -rw-r--r-- | kernel/time/timekeeping.c | 3 | ||||
| -rw-r--r-- | kernel/time/timer.c | 16 |
4 files changed, 59 insertions, 19 deletions
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index b8a14d2fb5ba..b7e52a642948 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -107,7 +107,7 @@ static u64 suspend_start; * This delay could be due to SMIs, NMIs, or to VCPU preemptions. Used as * a lower bound for cs->uncertainty_margin values when registering clocks. */ -#define WATCHDOG_MAX_SKEW (50 * NSEC_PER_USEC) +#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC) #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static void clocksource_watchdog_work(struct work_struct *work); @@ -199,23 +199,30 @@ void clocksource_mark_unstable(struct clocksource *cs) spin_unlock_irqrestore(&watchdog_lock, flags); } -ulong max_cswd_read_retries = 3; +ulong max_cswd_read_retries = 2; module_param(max_cswd_read_retries, ulong, 0644); EXPORT_SYMBOL_GPL(max_cswd_read_retries); static int verify_n_cpus = 8; module_param(verify_n_cpus, int, 0644); -static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow) +enum wd_read_status { + WD_READ_SUCCESS, + WD_READ_UNSTABLE, + WD_READ_SKIP +}; + +static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow) { unsigned int nretries; - u64 wd_end, wd_delta; - int64_t wd_delay; + u64 wd_end, wd_end2, wd_delta; + int64_t wd_delay, wd_seq_delay; for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) { local_irq_disable(); *wdnow = watchdog->read(watchdog); *csnow = cs->read(cs); wd_end = watchdog->read(watchdog); + wd_end2 = watchdog->read(watchdog); local_irq_enable(); wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask); @@ -226,13 +233,34 @@ static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow) pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n", smp_processor_id(), watchdog->name, nretries); } - return true; + return WD_READ_SUCCESS; } + + /* + * Now compute delay in consecutive watchdog read to see if + * there is too much external interferences that cause + * significant delay in reading both clocksource and watchdog. + * + * If consecutive WD read-back delay > WATCHDOG_MAX_SKEW/2, + * report system busy, reinit the watchdog and skip the current + * watchdog test. + */ + wd_delta = clocksource_delta(wd_end2, wd_end, watchdog->mask); + wd_seq_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift); + if (wd_seq_delay > WATCHDOG_MAX_SKEW/2) + goto skip_test; } pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n", smp_processor_id(), watchdog->name, wd_delay, nretries); - return false; + return WD_READ_UNSTABLE; + +skip_test: + pr_info("timekeeping watchdog on CPU%d: %s wd-wd read-back delay of %lldns\n", + smp_processor_id(), watchdog->name, wd_seq_delay); + pr_info("wd-%s-wd read-back delay of %lldns, clock-skew test skipped!\n", + cs->name, wd_delay); + return WD_READ_SKIP; } static u64 csnow_mid; @@ -356,6 +384,7 @@ static void clocksource_watchdog(struct timer_list *unused) int next_cpu, reset_pending; int64_t wd_nsec, cs_nsec; struct clocksource *cs; + enum wd_read_status read_ret; u32 md; spin_lock(&watchdog_lock); @@ -373,9 +402,12 @@ static void clocksource_watchdog(struct timer_list *unused) continue; } - if (!cs_watchdog_read(cs, &csnow, &wdnow)) { - /* Clock readout unreliable, so give it up. */ - __clocksource_unstable(cs); + read_ret = cs_watchdog_read(cs, &csnow, &wdnow); + + if (read_ret != WD_READ_SUCCESS) { + if (read_ret == WD_READ_UNSTABLE) + /* Clock readout unreliable, so give it up. */ + __clocksource_unstable(cs); continue; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6bffe5af8cb1..17a283ce2b20 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1375,6 +1375,13 @@ static inline void tick_nohz_irq_enter(void) now = ktime_get(); if (ts->idle_active) tick_nohz_stop_idle(ts, now); + /* + * If all CPUs are idle. We may need to update a stale jiffies value. + * Note nohz_full is a special case: a timekeeper is guaranteed to stay + * alive but it might be busy looping with interrupts disabled in some + * rare case (typically stop machine). So we must make sure we have a + * last resort. + */ if (ts->tick_stopped) tick_nohz_update_jiffies(now); } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b348749a9fc6..dcdcb85121e4 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1306,8 +1306,7 @@ int do_settimeofday64(const struct timespec64 *ts) timekeeping_forward_now(tk); xt = tk_xtime(tk); - ts_delta.tv_sec = ts->tv_sec - xt.tv_sec; - ts_delta.tv_nsec = ts->tv_nsec - xt.tv_nsec; + ts_delta = timespec64_sub(*ts, xt); if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) { ret = -EINVAL; diff --git a/kernel/time/timer.c b/kernel/time/timer.c index e3d2c23c413d..85f1021ad459 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -2054,26 +2054,28 @@ unsigned long msleep_interruptible(unsigned int msecs) EXPORT_SYMBOL(msleep_interruptible); /** - * usleep_range - Sleep for an approximate time - * @min: Minimum time in usecs to sleep - * @max: Maximum time in usecs to sleep + * usleep_range_state - Sleep for an approximate time in a given state + * @min: Minimum time in usecs to sleep + * @max: Maximum time in usecs to sleep + * @state: State of the current task that will be while sleeping * * In non-atomic context where the exact wakeup time is flexible, use - * usleep_range() instead of udelay(). The sleep improves responsiveness + * usleep_range_state() instead of udelay(). The sleep improves responsiveness * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces * power usage by allowing hrtimers to take advantage of an already- * scheduled interrupt instead of scheduling a new one just for this sleep. */ -void __sched usleep_range(unsigned long min, unsigned long max) +void __sched usleep_range_state(unsigned long min, unsigned long max, + unsigned int state) { ktime_t exp = ktime_add_us(ktime_get(), min); u64 delta = (u64)(max - min) * NSEC_PER_USEC; for (;;) { - __set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(state); /* Do not return before the requested sleep time has elapsed */ if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) break; } } -EXPORT_SYMBOL(usleep_range); +EXPORT_SYMBOL(usleep_range_state); |