aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kernel/time/tick-internal.h1
-rw-r--r--kernel/time/tick-sched.c40
-rw-r--r--kernel/time/timer.c60
3 files changed, 71 insertions, 30 deletions
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 481b7ab65e2c..47df30b871e4 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -163,6 +163,7 @@ static inline void timers_update_nohz(void) { }
DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
+u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle);
void timer_clear_idle(void);
#define CLOCK_SET_WALL \
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7c9efe3d9d56..344b904f520f 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -860,11 +860,6 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
delta = next_tick - basemono;
if (delta <= (u64)TICK_NSEC) {
/*
- * Tell the timer code that the base is not idle, i.e. undo
- * the effect of get_next_timer_interrupt():
- */
- timer_clear_idle();
- /*
* We've not stopped the tick yet, and there's a timer in the
* next period, so no point in stopping it either, bail.
*/
@@ -899,13 +894,39 @@ out:
static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
+ unsigned long basejiff = ts->last_jiffies;
u64 basemono = ts->timer_expires_base;
- u64 expires = ts->timer_expires;
+ bool timer_idle;
+ u64 expires;
/* Make sure we won't be trying to stop it twice in a row. */
ts->timer_expires_base = 0;
/*
+ * Now the tick should be stopped definitely - so the timer base needs
+ * to be marked idle as well to not miss a newly queued timer.
+ */
+ expires = timer_base_try_to_set_idle(basejiff, basemono, &timer_idle);
+ if (expires > ts->timer_expires) {
+ /*
+ * This path could only happen when the first timer was removed
+ * between calculating the possible sleep length and now (when
+ * high resolution mode is not active, timer could also be a
+ * hrtimer).
+ *
+ * We have to stick to the original calculated expiry value to
+ * not stop the tick for too long with a shallow C-state (which
+ * was programmed by cpuidle because of an early next expiration
+ * value).
+ */
+ expires = ts->timer_expires;
+ }
+
+ /* If the timer base is not idle, retain the not yet stopped tick. */
+ if (!timer_idle)
+ return;
+
+ /*
* If this CPU is the one which updates jiffies, then give up
* the assignment and let it be taken by the CPU which runs
* the tick timer next, which might be this CPU as well. If we
@@ -1001,7 +1022,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
touch_softlockup_watchdog_sched();
/* Cancel the scheduled timer and restore the tick: */
- ts->tick_stopped = 0;
+ ts->tick_stopped = 0;
tick_nohz_restart(ts, now);
}
@@ -1157,11 +1178,6 @@ void tick_nohz_idle_stop_tick(void)
void tick_nohz_idle_retain_tick(void)
{
tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
- /*
- * Undo the effect of get_next_timer_interrupt() called from
- * tick_nohz_next_event().
- */
- timer_clear_idle();
}
/**
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 9f0cdba4afe0..a4b8a58d05e5 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1956,19 +1956,22 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
}
-static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem)
+static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
+ bool *idle)
{
struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
unsigned long nextevt = basej + NEXT_TIMER_MAX_DELTA;
u64 expires = KTIME_MAX;
- bool was_idle;
/*
* Pretend that there is no timer pending if the cpu is offline.
* Possible pending timers will be migrated later to an active cpu.
*/
- if (cpu_is_offline(smp_processor_id()))
+ if (cpu_is_offline(smp_processor_id())) {
+ if (idle)
+ *idle = true;
return expires;
+ }
raw_spin_lock(&base->lock);
if (base->next_expiry_recalc)
@@ -1998,17 +2001,26 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem)
__forward_timer_base(base, basej);
/*
- * Base is idle if the next event is more than a tick away.
- *
- * If the base is marked idle then any timer add operation must forward
- * the base clk itself to keep granularity small. This idle logic is
- * only maintained for the BASE_STD base, deferrable timers may still
- * see large granularity skew (by design).
+ * Set base->is_idle only when caller is timer_base_try_to_set_idle()
*/
- was_idle = base->is_idle;
- base->is_idle = time_after(nextevt, basej + 1);
- if (was_idle != base->is_idle)
- trace_timer_base_idle(base->is_idle, base->cpu);
+ if (idle) {
+ /*
+ * Base is idle if the next event is more than a tick away.
+ *
+ * If the base is marked idle then any timer add operation must
+ * forward the base clk itself to keep granularity small. This
+ * idle logic is only maintained for the BASE_STD base,
+ * deferrable timers may still see large granularity skew (by
+ * design).
+ */
+ if (!base->is_idle) {
+ if (time_after(nextevt, basej + 1)) {
+ base->is_idle = true;
+ trace_timer_base_idle(true, base->cpu);
+ }
+ }
+ *idle = base->is_idle;
+ }
raw_spin_unlock(&base->lock);
@@ -2025,7 +2037,21 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem)
*/
u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
{
- return __get_next_timer_interrupt(basej, basem);
+ return __get_next_timer_interrupt(basej, basem, NULL);
+}
+
+/**
+ * timer_base_try_to_set_idle() - Try to set the idle state of the timer bases
+ * @basej: base time jiffies
+ * @basem: base time clock monotonic
+ * @idle: pointer to store the value of timer_base->is_idle
+ *
+ * Returns the tick aligned clock monotonic time of the next pending
+ * timer or KTIME_MAX if no timer is pending.
+ */
+u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle)
+{
+ return __get_next_timer_interrupt(basej, basem, idle);
}
/**
@@ -2043,10 +2069,8 @@ void timer_clear_idle(void)
* sending the IPI a few instructions smaller for the cost of taking
* the lock in the exit from idle path.
*/
- if (base->is_idle) {
- base->is_idle = false;
- trace_timer_base_idle(false, smp_processor_id());
- }
+ base->is_idle = false;
+ trace_timer_base_idle(false, smp_processor_id());
}
#endif