diff options
-rw-r--r-- | kernel/time/tick-internal.h | 1 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 40 | ||||
-rw-r--r-- | kernel/time/timer.c | 60 |
3 files changed, 71 insertions, 30 deletions
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 481b7ab65e2c..47df30b871e4 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -163,6 +163,7 @@ static inline void timers_update_nohz(void) { } DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases); extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem); +u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle); void timer_clear_idle(void); #define CLOCK_SET_WALL \ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 7c9efe3d9d56..344b904f520f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -860,11 +860,6 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu) delta = next_tick - basemono; if (delta <= (u64)TICK_NSEC) { /* - * Tell the timer code that the base is not idle, i.e. undo - * the effect of get_next_timer_interrupt(): - */ - timer_clear_idle(); - /* * We've not stopped the tick yet, and there's a timer in the * next period, so no point in stopping it either, bail. */ @@ -899,13 +894,39 @@ out: static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); + unsigned long basejiff = ts->last_jiffies; u64 basemono = ts->timer_expires_base; - u64 expires = ts->timer_expires; + bool timer_idle; + u64 expires; /* Make sure we won't be trying to stop it twice in a row. */ ts->timer_expires_base = 0; /* + * Now the tick should be stopped definitely - so the timer base needs + * to be marked idle as well to not miss a newly queued timer. + */ + expires = timer_base_try_to_set_idle(basejiff, basemono, &timer_idle); + if (expires > ts->timer_expires) { + /* + * This path could only happen when the first timer was removed + * between calculating the possible sleep length and now (when + * high resolution mode is not active, timer could also be a + * hrtimer). + * + * We have to stick to the original calculated expiry value to + * not stop the tick for too long with a shallow C-state (which + * was programmed by cpuidle because of an early next expiration + * value). + */ + expires = ts->timer_expires; + } + + /* If the timer base is not idle, retain the not yet stopped tick. */ + if (!timer_idle) + return; + + /* * If this CPU is the one which updates jiffies, then give up * the assignment and let it be taken by the CPU which runs * the tick timer next, which might be this CPU as well. If we @@ -1001,7 +1022,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) touch_softlockup_watchdog_sched(); /* Cancel the scheduled timer and restore the tick: */ - ts->tick_stopped = 0; + ts->tick_stopped = 0; tick_nohz_restart(ts, now); } @@ -1157,11 +1178,6 @@ void tick_nohz_idle_stop_tick(void) void tick_nohz_idle_retain_tick(void) { tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched)); - /* - * Undo the effect of get_next_timer_interrupt() called from - * tick_nohz_next_event(). - */ - timer_clear_idle(); } /** diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 9f0cdba4afe0..a4b8a58d05e5 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1956,19 +1956,22 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC; } -static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem) +static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem, + bool *idle) { struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); unsigned long nextevt = basej + NEXT_TIMER_MAX_DELTA; u64 expires = KTIME_MAX; - bool was_idle; /* * Pretend that there is no timer pending if the cpu is offline. * Possible pending timers will be migrated later to an active cpu. */ - if (cpu_is_offline(smp_processor_id())) + if (cpu_is_offline(smp_processor_id())) { + if (idle) + *idle = true; return expires; + } raw_spin_lock(&base->lock); if (base->next_expiry_recalc) @@ -1998,17 +2001,26 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem) __forward_timer_base(base, basej); /* - * Base is idle if the next event is more than a tick away. - * - * If the base is marked idle then any timer add operation must forward - * the base clk itself to keep granularity small. This idle logic is - * only maintained for the BASE_STD base, deferrable timers may still - * see large granularity skew (by design). + * Set base->is_idle only when caller is timer_base_try_to_set_idle() */ - was_idle = base->is_idle; - base->is_idle = time_after(nextevt, basej + 1); - if (was_idle != base->is_idle) - trace_timer_base_idle(base->is_idle, base->cpu); + if (idle) { + /* + * Base is idle if the next event is more than a tick away. + * + * If the base is marked idle then any timer add operation must + * forward the base clk itself to keep granularity small. This + * idle logic is only maintained for the BASE_STD base, + * deferrable timers may still see large granularity skew (by + * design). + */ + if (!base->is_idle) { + if (time_after(nextevt, basej + 1)) { + base->is_idle = true; + trace_timer_base_idle(true, base->cpu); + } + } + *idle = base->is_idle; + } raw_spin_unlock(&base->lock); @@ -2025,7 +2037,21 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem) */ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) { - return __get_next_timer_interrupt(basej, basem); + return __get_next_timer_interrupt(basej, basem, NULL); +} + +/** + * timer_base_try_to_set_idle() - Try to set the idle state of the timer bases + * @basej: base time jiffies + * @basem: base time clock monotonic + * @idle: pointer to store the value of timer_base->is_idle + * + * Returns the tick aligned clock monotonic time of the next pending + * timer or KTIME_MAX if no timer is pending. + */ +u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle) +{ + return __get_next_timer_interrupt(basej, basem, idle); } /** @@ -2043,10 +2069,8 @@ void timer_clear_idle(void) * sending the IPI a few instructions smaller for the cost of taking * the lock in the exit from idle path. */ - if (base->is_idle) { - base->is_idle = false; - trace_timer_base_idle(false, smp_processor_id()); - } + base->is_idle = false; + trace_timer_base_idle(false, smp_processor_id()); } #endif |