diff options
Diffstat (limited to 'kernel/time')
| -rw-r--r-- | kernel/time/Kconfig | 50 | ||||
| -rw-r--r-- | kernel/time/Makefile | 1 | ||||
| -rw-r--r-- | kernel/time/alarmtimer.c | 386 | ||||
| -rw-r--r-- | kernel/time/clockevents.c | 2 | ||||
| -rw-r--r-- | kernel/time/clocksource.c | 7 | ||||
| -rw-r--r-- | kernel/time/hrtimer.c | 181 | ||||
| -rw-r--r-- | kernel/time/itimer.c | 104 | ||||
| -rw-r--r-- | kernel/time/jiffies.c | 30 | ||||
| -rw-r--r-- | kernel/time/posix-clock.c | 123 | ||||
| -rw-r--r-- | kernel/time/posix-cpu-timers.c | 353 | ||||
| -rw-r--r-- | kernel/time/posix-stubs.c | 154 | ||||
| -rw-r--r-- | kernel/time/posix-timers.c | 796 | ||||
| -rw-r--r-- | kernel/time/posix-timers.h | 40 | ||||
| -rw-r--r-- | kernel/time/sched_clock.c | 6 | ||||
| -rw-r--r-- | kernel/time/tick-broadcast.c | 34 | ||||
| -rw-r--r-- | kernel/time/tick-internal.h | 2 | ||||
| -rw-r--r-- | kernel/time/tick-sched.c | 85 | ||||
| -rw-r--r-- | kernel/time/time.c | 186 | ||||
| -rw-r--r-- | kernel/time/timeconst.bc | 6 | ||||
| -rw-r--r-- | kernel/time/timekeeping.c | 153 | ||||
| -rw-r--r-- | kernel/time/timekeeping.h | 2 | ||||
| -rw-r--r-- | kernel/time/timekeeping_debug.c | 4 | ||||
| -rw-r--r-- | kernel/time/timer.c | 106 | ||||
| -rw-r--r-- | kernel/time/timer_list.c | 18 | ||||
| -rw-r--r-- | kernel/time/timer_stats.c | 425 |
25 files changed, 1545 insertions, 1709 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 4008d9f95dd7..ac09bc29eb08 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -126,56 +126,6 @@ config NO_HZ_FULL_ALL Note the boot CPU will still be kept outside the range to handle the timekeeping duty. -config NO_HZ_FULL_SYSIDLE - bool "Detect full-system idle state for full dynticks system" - depends on NO_HZ_FULL - default n - help - At least one CPU must keep the scheduling-clock tick running for - timekeeping purposes whenever there is a non-idle CPU, where - "non-idle" also includes dynticks CPUs as long as they are - running non-idle tasks. Because the underlying adaptive-tick - support cannot distinguish between all CPUs being idle and - all CPUs each running a single task in dynticks mode, the - underlying support simply ensures that there is always a CPU - handling the scheduling-clock tick, whether or not all CPUs - are idle. This Kconfig option enables scalable detection of - the all-CPUs-idle state, thus allowing the scheduling-clock - tick to be disabled when all CPUs are idle. Note that scalable - detection of the all-CPUs-idle state means that larger systems - will be slower to declare the all-CPUs-idle state. - - Say Y if you would like to help debug all-CPUs-idle detection. - - Say N if you are unsure. - -config NO_HZ_FULL_SYSIDLE_SMALL - int "Number of CPUs above which large-system approach is used" - depends on NO_HZ_FULL_SYSIDLE - range 1 NR_CPUS - default 8 - help - The full-system idle detection mechanism takes a lazy approach - on large systems, as is required to attain decent scalability. - However, on smaller systems, scalability is not anywhere near as - large a concern as is energy efficiency. The sysidle subsystem - therefore uses a fast but non-scalable algorithm for small - systems and a lazier but scalable algorithm for large systems. - This Kconfig parameter defines the number of CPUs in the largest - system that will be considered to be "small". - - The default value will be fine in most cases. Battery-powered - systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger - numbers of CPUs, and (3) are suffering from battery-lifetime - problems due to long sysidle latencies might wish to experiment - with larger values for this Kconfig parameter. On the other - hand, they might be even better served by disabling NO_HZ_FULL - entirely, given that NO_HZ_FULL is intended for HPC and - real-time workloads that at present do not tend to be run on - battery-powered systems. - - Take the default if you are unsure. - config NO_HZ bool "Old Idle dynticks config" depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 976840d29a71..938dbf33ef49 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -15,6 +15,5 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) endif obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o -obj-$(CONFIG_TIMER_STATS) += timer_stats.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index e6dc9a538efa..0b8ff7d257ea 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -19,12 +19,17 @@ #include <linux/hrtimer.h> #include <linux/timerqueue.h> #include <linux/rtc.h> +#include <linux/sched/signal.h> +#include <linux/sched/debug.h> #include <linux/alarmtimer.h> #include <linux/mutex.h> #include <linux/platform_device.h> #include <linux/posix-timers.h> #include <linux/workqueue.h> #include <linux/freezer.h> +#include <linux/compat.h> + +#include "posix-timers.h" #define CREATE_TRACE_POINTS #include <trace/events/alarmtimer.h> @@ -43,11 +48,13 @@ static struct alarm_base { clockid_t base_clockid; } alarm_bases[ALARM_NUMTYPE]; +#if defined(CONFIG_POSIX_TIMERS) || defined(CONFIG_RTC_CLASS) /* freezer information to handle clock_nanosleep triggered wakeups */ static enum alarmtimer_type freezer_alarmtype; static ktime_t freezer_expires; static ktime_t freezer_delta; static DEFINE_SPINLOCK(freezer_delta_lock); +#endif static struct wakeup_source *ws; @@ -305,38 +312,6 @@ static int alarmtimer_resume(struct device *dev) } #endif -static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) -{ - struct alarm_base *base; - unsigned long flags; - ktime_t delta; - - switch(type) { - case ALARM_REALTIME: - base = &alarm_bases[ALARM_REALTIME]; - type = ALARM_REALTIME_FREEZER; - break; - case ALARM_BOOTTIME: - base = &alarm_bases[ALARM_BOOTTIME]; - type = ALARM_BOOTTIME_FREEZER; - break; - default: - WARN_ONCE(1, "Invalid alarm type: %d\n", type); - return; - } - - delta = ktime_sub(absexp, base->gettime()); - - spin_lock_irqsave(&freezer_delta_lock, flags); - if (!freezer_delta || (delta < freezer_delta)) { - freezer_delta = delta; - freezer_expires = absexp; - freezer_alarmtype = type; - } - spin_unlock_irqrestore(&freezer_delta_lock, flags); -} - - /** * alarm_init - Initialize an alarm structure * @alarm: ptr to alarm to be initialized @@ -385,7 +360,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start) { struct alarm_base *base = &alarm_bases[alarm->type]; - start = ktime_add(start, base->gettime()); + start = ktime_add_safe(start, base->gettime()); alarm_start(alarm, start); } EXPORT_SYMBOL_GPL(alarm_start_relative); @@ -473,7 +448,7 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) overrun++; } - alarm->node.expires = ktime_add(alarm->node.expires, interval); + alarm->node.expires = ktime_add_safe(alarm->node.expires, interval); return overrun; } EXPORT_SYMBOL_GPL(alarm_forward); @@ -486,6 +461,38 @@ u64 alarm_forward_now(struct alarm *alarm, ktime_t interval) } EXPORT_SYMBOL_GPL(alarm_forward_now); +#ifdef CONFIG_POSIX_TIMERS + +static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) +{ + struct alarm_base *base; + unsigned long flags; + ktime_t delta; + + switch(type) { + case ALARM_REALTIME: + base = &alarm_bases[ALARM_REALTIME]; + type = ALARM_REALTIME_FREEZER; + break; + case ALARM_BOOTTIME: + base = &alarm_bases[ALARM_BOOTTIME]; + type = ALARM_BOOTTIME_FREEZER; + break; + default: + WARN_ONCE(1, "Invalid alarm type: %d\n", type); + return; + } + + delta = ktime_sub(absexp, base->gettime()); + + spin_lock_irqsave(&freezer_delta_lock, flags); + if (!freezer_delta || (delta < freezer_delta)) { + freezer_delta = delta; + freezer_expires = absexp; + freezer_alarmtype = type; + } + spin_unlock_irqrestore(&freezer_delta_lock, flags); +} /** * clock2alarm - helper that converts from clockid to alarmtypes @@ -509,22 +516,26 @@ static enum alarmtimer_type clock2alarm(clockid_t clockid) static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, ktime_t now) { - unsigned long flags; struct k_itimer *ptr = container_of(alarm, struct k_itimer, - it.alarm.alarmtimer); + it.alarm.alarmtimer); enum alarmtimer_restart result = ALARMTIMER_NORESTART; + unsigned long flags; + int si_private = 0; spin_lock_irqsave(&ptr->it_lock, flags); - if ((ptr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) { - if (IS_ENABLED(CONFIG_POSIX_TIMERS) && - posix_timer_event(ptr, 0) != 0) - ptr->it_overrun++; - } - /* Re-add periodic timers */ - if (ptr->it.alarm.interval) { - ptr->it_overrun += alarm_forward(alarm, now, - ptr->it.alarm.interval); + ptr->it_active = 0; + if (ptr->it_interval) + si_private = ++ptr->it_requeue_pending; + + if (posix_timer_event(ptr, si_private) && ptr->it_interval) { + /* + * Handle ignored signals and rearm the timer. This will go + * away once we handle ignored signals proper. + */ + ptr->it_overrun += alarm_forward_now(alarm, ptr->it_interval); + ++ptr->it_requeue_pending; + ptr->it_active = 1; result = ALARMTIMER_RESTART; } spin_unlock_irqrestore(&ptr->it_lock, flags); @@ -533,13 +544,79 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, } /** + * alarm_timer_rearm - Posix timer callback for rearming timer + * @timr: Pointer to the posixtimer data struct + */ +static void alarm_timer_rearm(struct k_itimer *timr) +{ + struct alarm *alarm = &timr->it.alarm.alarmtimer; + + timr->it_overrun += alarm_forward_now(alarm, timr->it_interval); + alarm_start(alarm, alarm->node.expires); +} + +/** + * alarm_timer_forward - Posix timer callback for forwarding timer + * @timr: Pointer to the posixtimer data struct + * @now: Current time to forward the timer against + */ +static int alarm_timer_forward(struct k_itimer *timr, ktime_t now) +{ + struct alarm *alarm = &timr->it.alarm.alarmtimer; + + return (int) alarm_forward(alarm, timr->it_interval, now); +} + +/** + * alarm_timer_remaining - Posix timer callback to retrieve remaining time + * @timr: Pointer to the posixtimer data struct + * @now: Current time to calculate against + */ +static ktime_t alarm_timer_remaining(struct k_itimer *timr, ktime_t now) +{ + struct alarm *alarm = &timr->it.alarm.alarmtimer; + + return ktime_sub(now, alarm->node.expires); +} + +/** + * alarm_timer_try_to_cancel - Posix timer callback to cancel a timer + * @timr: Pointer to the posixtimer data struct + */ +static int alarm_timer_try_to_cancel(struct k_itimer *timr) +{ + return alarm_try_to_cancel(&timr->it.alarm.alarmtimer); +} + +/** + * alarm_timer_arm - Posix timer callback to arm a timer + * @timr: Pointer to the posixtimer data struct + * @expires: The new expiry time + * @absolute: Expiry value is absolute time + * @sigev_none: Posix timer does not deliver signals + */ +static void alarm_timer_arm(struct k_itimer *timr, ktime_t expires, + bool absolute, bool sigev_none) +{ + struct alarm *alarm = &timr->it.alarm.alarmtimer; + struct alarm_base *base = &alarm_bases[alarm->type]; + + if (!absolute) + expires = ktime_add_safe(expires, base->gettime()); + if (sigev_none) + alarm->node.expires = expires; + else + alarm_start(&timr->it.alarm.alarmtimer, expires); +} + +/** * alarm_clock_getres - posix getres interface * @which_clock: clockid * @tp: timespec to fill * * Returns the granularity of underlying alarm base clock */ -static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp) +static int alarm_clock_getres(const clockid_t which_clock, struct timespec64 *tp) { if (!alarmtimer_get_rtcdev()) return -EINVAL; @@ -556,14 +633,14 @@ static int alarm_clock_getres(const clockid_t which_clock, struct timespec *tp) * * Provides the underlying alarm base time. */ -static int alarm_clock_get(clockid_t which_clock, struct timespec *tp) +static int alarm_clock_get(clockid_t which_clock, struct timespec64 *tp) { struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)]; if (!alarmtimer_get_rtcdev()) return -EINVAL; - *tp = ktime_to_timespec(base->gettime()); + *tp = ktime_to_timespec64(base->gettime()); return 0; } @@ -589,89 +666,6 @@ static int alarm_timer_create(struct k_itimer *new_timer) } /** - * alarm_timer_get - posix timer_get interface - * @new_timer: k_itimer pointer - * @cur_setting: itimerspec data to fill - * - * Copies out the current itimerspec data - */ -static void alarm_timer_get(struct k_itimer *timr, - struct itimerspec *cur_setting) -{ - ktime_t relative_expiry_time = - alarm_expires_remaining(&(timr->it.alarm.alarmtimer)); - - if (ktime_to_ns(relative_expiry_time) > 0) { - cur_setting->it_value = ktime_to_timespec(relative_expiry_time); - } else { - cur_setting->it_value.tv_sec = 0; - cur_setting->it_value.tv_nsec = 0; - } - - cur_setting->it_interval = ktime_to_timespec(timr->it.alarm.interval); -} - -/** - * alarm_timer_del - posix timer_del interface - * @timr: k_itimer pointer to be deleted - * - * Cancels any programmed alarms for the given timer. - */ -static int alarm_timer_del(struct k_itimer *timr) -{ - if (!rtcdev) - return -ENOTSUPP; - - if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0) - return TIMER_RETRY; - - return 0; -} - -/** - * alarm_timer_set - posix timer_set interface - * @timr: k_itimer pointer to be deleted - * @flags: timer flags - * @new_setting: itimerspec to be used - * @old_setting: itimerspec being replaced - * - * Sets the timer to new_setting, and starts the timer. - */ -static int alarm_timer_set(struct k_itimer *timr, int flags, - struct itimerspec *new_setting, - struct itimerspec *old_setting) -{ - ktime_t exp; - - if (!rtcdev) - return -ENOTSUPP; - - if (flags & ~TIMER_ABSTIME) - return -EINVAL; - - if (old_setting) - alarm_timer_get(timr, old_setting); - - /* If the timer was already set, cancel it */ - if (alarm_try_to_cancel(&timr->it.alarm.alarmtimer) < 0) - return TIMER_RETRY; - - /* start the timer */ - timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); - exp = timespec_to_ktime(new_setting->it_value); - /* Convert (if necessary) to absolute time */ - if (flags != TIMER_ABSTIME) { - ktime_t now; - - now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime(); - exp = ktime_add(now, exp); - } - - alarm_start(&timr->it.alarm.alarmtimer, exp); - return 0; -} - -/** * alarmtimer_nsleep_wakeup - Wakeup function for alarm_timer_nsleep * @alarm: ptr to alarm that fired * @@ -695,8 +689,10 @@ static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm, * * Sets the alarm timer and sleeps until it is fired or interrupted. */ -static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp) +static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp, + enum alarmtimer_type type) { + struct restart_block *restart; alarm->data = (void *)current; do { set_current_state(TASK_INTERRUPTIBLE); @@ -709,36 +705,25 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp) __set_current_state(TASK_RUNNING); - return (alarm->data == NULL); -} - - -/** - * update_rmtp - Update remaining timespec value - * @exp: expiration time - * @type: timer type - * @rmtp: user pointer to remaining timepsec value - * - * Helper function that fills in rmtp value with time between - * now and the exp value - */ -static int update_rmtp(ktime_t exp, enum alarmtimer_type type, - struct timespec __user *rmtp) -{ - struct timespec rmt; - ktime_t rem; - - rem = ktime_sub(exp, alarm_bases[type].gettime()); - - if (rem <= 0) + if (!alarm->data) return 0; - rmt = ktime_to_timespec(rem); - if (copy_to_user(rmtp, &rmt, sizeof(*rmtp))) - return -EFAULT; + if (freezing(current)) + alarmtimer_freezerset(absexp, type); + restart = ¤t->restart_block; + if (restart->nanosleep.type != TT_NONE) { + struct timespec64 rmt; + ktime_t rem; + + rem = ktime_sub(absexp, alarm_bases[type].gettime()); - return 1; + if (rem <= 0) + return 0; + rmt = ktime_to_timespec64(rem); + return nanosleep_copyout(restart, &rmt); + } + return -ERESTART_RESTARTBLOCK; } /** @@ -750,32 +735,12 @@ static int update_rmtp(ktime_t exp, enum alarmtimer_type type, static long __sched alarm_timer_nsleep_restart(struct restart_block *restart) { enum alarmtimer_type type = restart->nanosleep.clockid; - ktime_t exp; - struct timespec __user *rmtp; + ktime_t exp = restart->nanosleep.expires; struct alarm alarm; - int ret = 0; - exp = restart->nanosleep.expires; alarm_init(&alarm, type, alarmtimer_nsleep_wakeup); - if (alarmtimer_do_nsleep(&alarm, exp)) - goto out; - - if (freezing(current)) - alarmtimer_freezerset(exp, type); - - rmtp = restart->nanosleep.rmtp; - if (rmtp) { - ret = update_rmtp(exp, type, rmtp); - if (ret <= 0) - goto out; - } - - - /* The other values in restart are already filled in */ - ret = -ERESTART_RESTARTBLOCK; -out: - return ret; + return alarmtimer_do_nsleep(&alarm, exp, type); } /** @@ -788,13 +753,13 @@ out: * Handles clock_nanosleep calls against _ALARM clockids */ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, - struct timespec *tsreq, struct timespec __user *rmtp) + const struct timespec64 *tsreq) { enum alarmtimer_type type = clock2alarm(which_clock); + struct restart_block *restart = ¤t->restart_block; struct alarm alarm; ktime_t exp; int ret = 0; - struct restart_block *restart; if (!alarmtimer_get_rtcdev()) return -ENOTSUPP; @@ -807,42 +772,43 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, alarm_init(&alarm, type, alarmtimer_nsleep_wakeup); - exp = timespec_to_ktime(*tsreq); + exp = timespec64_to_ktime(*tsreq); /* Convert (if necessary) to absolute time */ if (flags != TIMER_ABSTIME) { ktime_t now = alarm_bases[type].gettime(); exp = ktime_add(now, exp); } - if (alarmtimer_do_nsleep(&alarm, exp)) - goto out; - - if (freezing(current)) - alarmtimer_freezerset(exp, type); + ret = alarmtimer_do_nsleep(&alarm, exp, type); + if (ret != -ERESTART_RESTARTBLOCK) + return ret; /* abs timers don't set remaining time or restart */ - if (flags == TIMER_ABSTIME) { - ret = -ERESTARTNOHAND; - goto out; - } + if (flags == TIMER_ABSTIME) + return -ERESTARTNOHAND; - if (rmtp) { - ret = update_rmtp(exp, type, rmtp); - if (ret <= 0) - goto out; - } - - restart = ¤t->restart_block; restart->fn = alarm_timer_nsleep_restart; restart->nanosleep.clockid = type; restart->nanosleep.expires = exp; - restart->nanosleep.rmtp = rmtp; - ret = -ERESTART_RESTARTBLOCK; - -out: return ret; } +const struct k_clock alarm_clock = { + .clock_getres = alarm_clock_getres, + .clock_get = alarm_clock_get, + .timer_create = alarm_timer_create, + .timer_set = common_timer_set, + .timer_del = common_timer_del, + .timer_get = common_timer_get, + .timer_arm = alarm_timer_arm, + .timer_rearm = alarm_timer_rearm, + .timer_forward = alarm_timer_forward, + .timer_remaining = alarm_timer_remaining, + .timer_try_to_cancel = alarm_timer_try_to_cancel, + .nsleep = alarm_timer_nsleep, +}; +#endif /* CONFIG_POSIX_TIMERS */ + /* Suspend hook structures */ static const struct dev_pm_ops alarmtimer_pm_ops = { @@ -868,23 +834,9 @@ static int __init alarmtimer_init(void) struct platform_device *pdev; int error = 0; int i; - struct k_clock alarm_clock = { - .clock_getres = alarm_clock_getres, - .clock_get = alarm_clock_get, - .timer_create = alarm_timer_create, - .timer_set = alarm_timer_set, - .timer_del = alarm_timer_del, - .timer_get = alarm_timer_get, - .nsleep = alarm_timer_nsleep, - }; alarmtimer_rtc_timer_init(); - if (IS_ENABLED(CONFIG_POSIX_TIMERS)) { - posix_timers_register_clock(CLOCK_REALTIME_ALARM, &alarm_clock); - posix_timers_register_clock(CLOCK_BOOTTIME_ALARM, &alarm_clock); - } - /* Initialize alarm bases */ alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME; alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real; diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 97ac0951f164..4237e0744e26 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -468,7 +468,7 @@ void clockevents_register_device(struct clock_event_device *dev) } EXPORT_SYMBOL_GPL(clockevents_register_device); -void clockevents_config(struct clock_event_device *dev, u32 freq) +static void clockevents_config(struct clock_event_device *dev, u32 freq) { u64 sec; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 665985b0a89a..03918a19cf2d 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -141,6 +141,10 @@ static void __clocksource_unstable(struct clocksource *cs) { cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); cs->flags |= CLOCK_SOURCE_UNSTABLE; + + if (cs->mark_unstable) + cs->mark_unstable(cs); + if (finished_booting) schedule_work(&watchdog_work); } @@ -229,6 +233,9 @@ static void clocksource_watchdog(unsigned long data) continue; } + if (cs == curr_clocksource && cs->tick_stable) + cs->tick_stable(cs); + if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) && (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) { diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index c6ecedd3b839..88f75f92ef36 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -43,12 +43,15 @@ #include <linux/seq_file.h> #include <linux/err.h> #include <linux/debugobjects.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/sched/sysctl.h> #include <linux/sched/rt.h> #include <linux/sched/deadline.h> +#include <linux/sched/nohz.h> +#include <linux/sched/debug.h> #include <linux/timer.h> #include <linux/freezer.h> +#include <linux/compat.h> #include <linux/uaccess.h> @@ -94,17 +97,15 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = }; static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { + /* Make sure we catch unsupported clockids */ + [0 ... MAX_CLOCKS - 1] = HRTIMER_MAX_CLOCK_BASES, + [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, [CLOCK_TAI] = HRTIMER_BASE_TAI, }; -static inline int hrtimer_clockid_to_base(clockid_t clock_id) -{ - return hrtimer_clock_to_base_table[clock_id]; -} - /* * Functions and macros which are different for UP/SMP systems are kept in a * single place @@ -766,34 +767,6 @@ void hrtimers_resume(void) clock_was_set_delayed(); } -static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - if (timer->start_site) - return; - timer->start_site = __builtin_return_address(0); - memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); - timer->start_pid = current->pid; -#endif -} - -static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; -#endif -} - -static inline void timer_stats_account_hrtimer(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - if (likely(!timer_stats_active)) - return; - timer_stats_update_stats(timer, timer->start_pid, timer->start_site, - timer->function, timer->start_comm, 0); -#endif -} - /* * Counterpart to lock_hrtimer_base above: */ @@ -932,7 +905,6 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest * rare case and less expensive than a smp call. */ debug_deactivate(timer); - timer_stats_hrtimer_clear_start_info(timer); reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); if (!restart) @@ -990,8 +962,6 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* Switch the timer base, if necessary: */ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - timer_stats_hrtimer_set_start_info(timer); - leftmost = enqueue_hrtimer(timer, new_base); if (!leftmost) goto unlock; @@ -1018,7 +988,7 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); * Returns: * 0 when the timer was not active * 1 when the timer was active - * -1 when the timer is currently excuting the callback function and + * -1 when the timer is currently executing the callback function and * cannot be stopped */ int hrtimer_try_to_cancel(struct hrtimer *timer) @@ -1112,6 +1082,18 @@ u64 hrtimer_get_next_event(void) } #endif +static inline int hrtimer_clockid_to_base(clockid_t clock_id) +{ + if (likely(clock_id < MAX_CLOCKS)) { + int base = hrtimer_clock_to_base_table[clock_id]; + + if (likely(base != HRTIMER_MAX_CLOCK_BASES)) + return base; + } + WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); + return HRTIMER_BASE_MONOTONIC; +} + static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { @@ -1128,12 +1110,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, base = hrtimer_clockid_to_base(clock_id); timer->base = &cpu_base->clock_base[base]; timerqueue_init(&timer->node); - -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; - timer->start_pid = -1; - memset(timer->start_comm, 0, TASK_COMM_LEN); -#endif } /** @@ -1217,7 +1193,6 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, raw_write_seqcount_barrier(&cpu_base->seq); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); - timer_stats_account_hrtimer(timer); fn = timer->function; /* @@ -1394,10 +1369,7 @@ retry: ktime_to_ns(delta)); } -/* - * local version of hrtimer_peek_ahead_timers() called with interrupts - * disabled. - */ +/* called with interrupts disabled */ static inline void __hrtimer_peek_ahead_timers(void) { struct tick_device *td; @@ -1468,8 +1440,29 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) } EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); +int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) +{ + switch(restart->nanosleep.type) { +#ifdef CONFIG_COMPAT + case TT_COMPAT: + if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp)) + return -EFAULT; + break; +#endif + case TT_NATIVE: + if (put_timespec64(ts, restart->nanosleep.rmtp)) + return -EFAULT; + break; + default: + BUG(); + } + return -ERESTART_RESTARTBLOCK; +} + static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) { + struct restart_block *restart; + hrtimer_init_sleeper(t, current); do { @@ -1486,53 +1479,38 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod __set_current_state(TASK_RUNNING); - return t->task == NULL; -} - -static int update_rmtp(struct hrtimer *timer, struct timespec __user *rmtp) -{ - struct timespec rmt; - ktime_t rem; - - rem = hrtimer_expires_remaining(timer); - if (rem <= 0) + if (!t->task) return 0; - rmt = ktime_to_timespec(rem); - if (copy_to_user(rmtp, &rmt, sizeof(*rmtp))) - return -EFAULT; + restart = ¤t->restart_block; + if (restart->nanosleep.type != TT_NONE) { + ktime_t rem = hrtimer_expires_remaining(&t->timer); + struct timespec64 rmt; + + if (rem <= 0) + return 0; + rmt = ktime_to_timespec64(rem); - return 1; + return nanosleep_copyout(restart, &rmt); + } + return -ERESTART_RESTARTBLOCK; } -long __sched hrtimer_nanosleep_restart(struct restart_block *restart) +static long __sched hrtimer_nanosleep_restart(struct restart_block *restart) { struct hrtimer_sleeper t; - struct timespec __user *rmtp; - int ret = 0; + int ret; hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, HRTIMER_MODE_ABS); hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); - if (do_nanosleep(&t, HRTIMER_MODE_ABS)) - goto out; - - rmtp = restart->nanosleep.rmtp; - if (rmtp) { - ret = update_rmtp(&t.timer, rmtp); - if (ret <= 0) - goto out; - } - - /* The other values in restart are already filled in */ - ret = -ERESTART_RESTARTBLOCK; -out: + ret = do_nanosleep(&t, HRTIMER_MODE_ABS); destroy_hrtimer_on_stack(&t.timer); return ret; } -long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, +long hrtimer_nanosleep(const struct timespec64 *rqtp, const enum hrtimer_mode mode, const clockid_t clockid) { struct restart_block *restart; @@ -1545,8 +1523,9 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, slack = 0; hrtimer_init_on_stack(&t.timer, clockid, mode); - hrtimer_set_expires_range_ns(&t.timer, timespec_to_ktime(*rqtp), slack); - if (do_nanosleep(&t, mode)) + hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); + ret = do_nanosleep(&t, mode); + if (ret != -ERESTART_RESTARTBLOCK) goto out; /* Absolute timers do not update the rmtp value and restart: */ @@ -1555,19 +1534,10 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp, goto out; } - if (rmtp) { - ret = update_rmtp(&t.timer, rmtp); - if (ret <= 0) - goto out; - } - restart = ¤t->restart_block; restart->fn = hrtimer_nanosleep_restart; restart->nanosleep.clockid = t.timer.base->clockid; - restart->nanosleep.rmtp = rmtp; restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); - - ret = -ERESTART_RESTARTBLOCK; out: destroy_hrtimer_on_stack(&t.timer); return ret; @@ -1576,16 +1546,37 @@ out: SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, struct timespec __user *, rmtp) { - struct timespec tu; + struct timespec64 tu; + + if (get_timespec64(&tu, rqtp)) + return -EFAULT; + + if (!timespec64_valid(&tu)) + return -EINVAL; + + current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; + current->restart_block.nanosleep.rmtp = rmtp; + return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC); +} + +#ifdef CONFIG_COMPAT + +COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp, + struct compat_timespec __user *, rmtp) +{ + struct timespec64 tu; - if (copy_from_user(&tu, rqtp, sizeof(tu))) + if (compat_get_timespec64(&tu, rqtp)) return -EFAULT; - if (!timespec_valid(&tu)) + if (!timespec64_valid(&tu)) return -EINVAL; - return hrtimer_nanosleep(&tu, rmtp, HRTIMER_MODE_REL, CLOCK_MONOTONIC); + current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; + current->restart_block.nanosleep.compat_rmtp = rmtp; + return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC); } +#endif /* * Functions related to boot-time initialization: diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 8c89143f9ebf..2ef98a02376a 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -10,9 +10,12 @@ #include <linux/interrupt.h> #include <linux/syscalls.h> #include <linux/time.h> +#include <linux/sched/signal.h> +#include <linux/sched/cputime.h> #include <linux/posix-timers.h> #include <linux/hrtimer.h> #include <trace/events/timer.h> +#include <linux/compat.h> #include <linux/uaccess.h> @@ -45,16 +48,16 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer) static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, struct itimerval *const value) { - cputime_t cval, cinterval; + u64 val, interval; struct cpu_itimer *it = &tsk->signal->it[clock_id]; spin_lock_irq(&tsk->sighand->siglock); - cval = it->expires; - cinterval = it->incr; - if (cval) { + val = it->expires; + interval = it->incr; + if (val) { struct task_cputime cputime; - cputime_t t; + u64 t; thread_group_cputimer(tsk, &cputime); if (clock_id == CPUCLOCK_PROF) @@ -63,17 +66,17 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, /* CPUCLOCK_VIRT */ t = cputime.utime; - if (cval < t) + if (val < t) /* about to fire */ - cval = cputime_one_jiffy; + val = TICK_NSEC; else - cval = cval - t; + val -= t; } spin_unlock_irq(&tsk->sighand->siglock); - cputime_to_timeval(cval, &value->it_value); - cputime_to_timeval(cinterval, &value->it_interval); + value->it_value = ns_to_timeval(val); + value->it_interval = ns_to_timeval(interval); } int do_getitimer(int which, struct itimerval *value) @@ -114,6 +117,19 @@ SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value) return error; } +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE2(getitimer, int, which, + struct compat_itimerval __user *, it) +{ + struct itimerval kit; + int error = do_getitimer(which, &kit); + + if (!error && put_compat_itimerval(it, &kit)) + error = -EFAULT; + return error; +} +#endif + /* * The timer is automagically restarted, when interval != 0 @@ -129,55 +145,39 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } -static inline u32 cputime_sub_ns(cputime_t ct, s64 real_ns) -{ - struct timespec ts; - s64 cpu_ns; - - cputime_to_timespec(ct, &ts); - cpu_ns = timespec_to_ns(&ts); - - return (cpu_ns <= real_ns) ? 0 : cpu_ns - real_ns; -} - static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, const struct itimerval *const value, struct itimerval *const ovalue) { - cputime_t cval, nval, cinterval, ninterval; - s64 ns_ninterval, ns_nval; - u32 error, incr_error; + u64 oval, nval, ointerval, ninterval; struct cpu_itimer *it = &tsk->signal->it[clock_id]; - nval = timeval_to_cputime(&value->it_value); - ns_nval = timeval_to_ns(&value->it_value); - ninterval = timeval_to_cputime(&value->it_interval); - ns_ninterval = timeval_to_ns(&value->it_interval); - - error = cputime_sub_ns(nval, ns_nval); - incr_error = cputime_sub_ns(ninterval, ns_ninterval); + /* + * Use the to_ktime conversion because that clamps the maximum + * value to KTIME_MAX and avoid multiplication overflows. + */ + nval = ktime_to_ns(timeval_to_ktime(value->it_value)); + ninterval = ktime_to_ns(timeval_to_ktime(value->it_interval)); spin_lock_irq(&tsk->sighand->siglock); - cval = it->expires; - cinterval = it->incr; - if (cval || nval) { + oval = it->expires; + ointerval = it->incr; + if (oval || nval) { if (nval > 0) - nval += cputime_one_jiffy; - set_process_cpu_timer(tsk, clock_id, &nval, &cval); + nval += TICK_NSEC; + set_process_cpu_timer(tsk, clock_id, &nval, &oval); } it->expires = nval; it->incr = ninterval; - it->error = error; - it->incr_error = incr_error; trace_itimer_state(clock_id == CPUCLOCK_VIRT ? ITIMER_VIRTUAL : ITIMER_PROF, value, nval); spin_unlock_irq(&tsk->sighand->siglock); if (ovalue) { - cputime_to_timeval(cval, &ovalue->it_value); - cputime_to_timeval(cinterval, &ovalue->it_interval); + ovalue->it_value = ns_to_timeval(oval); + ovalue->it_interval = ns_to_timeval(ointerval); } } @@ -312,3 +312,27 @@ SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value, return -EFAULT; return 0; } + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE3(setitimer, int, which, + struct compat_itimerval __user *, in, + struct compat_itimerval __user *, out) +{ + struct itimerval kin, kout; + int error; + + if (in) { + if (get_compat_itimerval(&kin, in)) + return -EFAULT; + } else { + memset(&kin, 0, sizeof(kin)); + } + + error = do_setitimer(which, &kin, out ? &kout : NULL); + if (error || !out) + return error; + if (put_compat_itimerval(out, &kout)) + return -EFAULT; + return 0; +} +#endif diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index a4a0e478e44d..497719127bf9 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -27,19 +27,8 @@ #include "timekeeping.h" -/* The Jiffies based clocksource is the lowest common - * denominator clock source which should function on - * all systems. It has the same coarse resolution as - * the timer interrupt frequency HZ and it suffers - * inaccuracies caused by missed or lost timer - * interrupts and the inability for the timer - * interrupt hardware to accuratly tick at the - * requested HZ value. It is also not recommended - * for "tick-less" systems. - */ -#define NSEC_PER_JIFFY ((NSEC_PER_SEC+HZ/2)/HZ) -/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier +/* Since jiffies uses a simple TICK_NSEC multiplier * conversion, the .shift value could be zero. However * this would make NTP adjustments impossible as they are * in units of 1/2^.shift. Thus we use JIFFIES_SHIFT to @@ -47,8 +36,8 @@ * amount, and give ntp adjustments in units of 1/2^8 * * The value 8 is somewhat carefully chosen, as anything - * larger can result in overflows. NSEC_PER_JIFFY grows as - * HZ shrinks, so values greater than 8 overflow 32bits when + * larger can result in overflows. TICK_NSEC grows as HZ + * shrinks, so values greater than 8 overflow 32bits when * HZ=100. */ #if HZ < 34 @@ -64,12 +53,23 @@ static u64 jiffies_read(struct clocksource *cs) return (u64) jiffies; } +/* + * The Jiffies based clocksource is the lowest common + * denominator clock source which should function on + * all systems. It has the same coarse resolution as + * the timer interrupt frequency HZ and it suffers + * inaccuracies caused by missed or lost timer + * interrupts and the inability for the timer + * interrupt hardware to accuratly tick at the + * requested HZ value. It is also not recommended + * for "tick-less" systems. + */ static struct clocksource clocksource_jiffies = { .name = "jiffies", .rating = 1, /* lowest valid rating*/ .read = jiffies_read, .mask = CLOCKSOURCE_MASK(32), - .mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */ + .mult = TICK_NSEC << JIFFIES_SHIFT, /* details above */ .shift = JIFFIES_SHIFT, .max_cycles = 10, }; diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 9cff0ab82b63..17cdc554c9fe 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -25,6 +25,8 @@ #include <linux/syscalls.h> #include <linux/uaccess.h> +#include "posix-timers.h" + static void delete_clock(struct kref *kref); /* @@ -82,38 +84,6 @@ static unsigned int posix_clock_poll(struct file *fp, poll_table *wait) return result; } -static int posix_clock_fasync(int fd, struct file *fp, int on) -{ - struct posix_clock *clk = get_posix_clock(fp); - int err = 0; - - if (!clk) - return -ENODEV; - - if (clk->ops.fasync) - err = clk->ops.fasync(clk, fd, fp, on); - - put_posix_clock(clk); - - return err; -} - -static int posix_clock_mmap(struct file *fp, struct vm_area_struct *vma) -{ - struct posix_clock *clk = get_posix_clock(fp); - int err = -ENODEV; - - if (!clk) - return -ENODEV; - - if (clk->ops.mmap) - err = clk->ops.mmap(clk, vma); - - put_posix_clock(clk); - - return err; -} - static long posix_clock_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) { @@ -199,8 +169,6 @@ static const struct file_operations posix_clock_file_operations = { .unlocked_ioctl = posix_clock_ioctl, .open = posix_clock_open, .release = posix_clock_release, - .fasync = posix_clock_fasync, - .mmap = posix_clock_mmap, #ifdef CONFIG_COMPAT .compat_ioctl = posix_clock_compat_ioctl, #endif @@ -297,7 +265,7 @@ out: return err; } -static int pc_clock_gettime(clockid_t id, struct timespec *ts) +static int pc_clock_gettime(clockid_t id, struct timespec64 *ts) { struct posix_clock_desc cd; int err; @@ -316,7 +284,7 @@ static int pc_clock_gettime(clockid_t id, struct timespec *ts) return err; } -static int pc_clock_getres(clockid_t id, struct timespec *ts) +static int pc_clock_getres(clockid_t id, struct timespec64 *ts) { struct posix_clock_desc cd; int err; @@ -335,7 +303,7 @@ static int pc_clock_getres(clockid_t id, struct timespec *ts) return err; } -static int pc_clock_settime(clockid_t id, const struct timespec *ts) +static int pc_clock_settime(clockid_t id, const struct timespec64 *ts) { struct posix_clock_desc cd; int err; @@ -359,88 +327,9 @@ out: return err; } -static int pc_timer_create(struct k_itimer *kit) -{ - clockid_t id = kit->it_clock; - struct posix_clock_desc cd; - int err; - - err = get_clock_desc(id, &cd); - if (err) - return err; - - if (cd.clk->ops.timer_create) - err = cd.clk->ops.timer_create(cd.clk, kit); - else - err = -EOPNOTSUPP; - - put_clock_desc(&cd); - - return err; -} - -static int pc_timer_delete(struct k_itimer *kit) -{ - clockid_t id = kit->it_clock; - struct posix_clock_desc cd; - int err; - - err = get_clock_desc(id, &cd); - if (err) - return err; - - if (cd.clk->ops.timer_delete) - err = cd.clk->ops.timer_delete(cd.clk, kit); - else - err = -EOPNOTSUPP; - - put_clock_desc(&cd); - - return err; -} - -static void pc_timer_gettime(struct k_itimer *kit, struct itimerspec *ts) -{ - clockid_t id = kit->it_clock; - struct posix_clock_desc cd; - - if (get_clock_desc(id, &cd)) - return; - - if (cd.clk->ops.timer_gettime) - cd.clk->ops.timer_gettime(cd.clk, kit, ts); - - put_clock_desc(&cd); -} - -static int pc_timer_settime(struct k_itimer *kit, int flags, - struct itimerspec *ts, struct itimerspec *old) -{ - clockid_t id = kit->it_clock; - struct posix_clock_desc cd; - int err; - - err = get_clock_desc(id, &cd); - if (err) - return err; - - if (cd.clk->ops.timer_settime) - err = cd.clk->ops.timer_settime(cd.clk, kit, flags, ts, old); - else - err = -EOPNOTSUPP; - - put_clock_desc(&cd); - - return err; -} - -struct k_clock clock_posix_dynamic = { +const struct k_clock clock_posix_dynamic = { .clock_getres = pc_clock_getres, .clock_set = pc_clock_settime, .clock_get = pc_clock_gettime, .clock_adj = pc_clock_adjtime, - .timer_create = pc_timer_create, - .timer_set = pc_timer_settime, - .timer_del = pc_timer_delete, - .timer_get = pc_timer_gettime, }; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index e9e8c10f0d9a..a3bd5dbe0dc4 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -2,7 +2,8 @@ * Implement CPU time clocks for the POSIX clock interface. */ -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/cputime.h> #include <linux/posix-timers.h> #include <linux/errno.h> #include <linux/math64.h> @@ -11,6 +12,11 @@ #include <trace/events/timer.h> #include <linux/tick.h> #include <linux/workqueue.h> +#include <linux/compat.h> + +#include "posix-timers.h" + +static void posix_cpu_timer_rearm(struct k_itimer *timer); /* * Called after updating RLIMIT_CPU to run cpu timer and update @@ -20,10 +26,10 @@ */ void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new) { - cputime_t cputime = secs_to_cputime(rlim_new); + u64 nsecs = rlim_new * NSEC_PER_SEC; spin_lock_irq(&task->sighand->siglock); - set_process_cpu_timer(task, CPUCLOCK_PROF, &cputime, NULL); + set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL); spin_unlock_irq(&task->sighand->siglock); } @@ -50,39 +56,14 @@ static int check_clock(const clockid_t which_clock) return error; } -static inline unsigned long long -timespec_to_sample(const clockid_t which_clock, const struct timespec *tp) -{ - unsigned long long ret; - - ret = 0; /* high half always zero when .cpu used */ - if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) { - ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec; - } else { - ret = cputime_to_expires(timespec_to_cputime(tp)); - } - return ret; -} - -static void sample_to_timespec(const clockid_t which_clock, - unsigned long long expires, - struct timespec *tp) -{ - if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) - *tp = ns_to_timespec(expires); - else - cputime_to_timespec((__force cputime_t)expires, tp); -} - /* * Update expiry time from increment, and increase overrun count, * given the current clock sample. */ -static void bump_cpu_timer(struct k_itimer *timer, - unsigned long long now) +static void bump_cpu_timer(struct k_itimer *timer, u64 now) { int i; - unsigned long long delta, incr; + u64 delta, incr; if (timer->it.cpu.incr == 0) return; @@ -122,25 +103,25 @@ static inline int task_cputime_zero(const struct task_cputime *cputime) return 0; } -static inline unsigned long long prof_ticks(struct task_struct *p) +static inline u64 prof_ticks(struct task_struct *p) { - cputime_t utime, stime; + u64 utime, stime; task_cputime(p, &utime, &stime); - return cputime_to_expires(utime + stime); + return utime + stime; } -static inline unsigned long long virt_ticks(struct task_struct *p) +static inline u64 virt_ticks(struct task_struct *p) { - cputime_t utime, stime; + u64 utime, stime; task_cputime(p, &utime, &stime); - return cputime_to_expires(utime); + return utime; } static int -posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) +posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp) { int error = check_clock(which_clock); if (!error) { @@ -159,7 +140,7 @@ posix_cpu_clock_getres(const clockid_t which_clock, struct timespec *tp) } static int -posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) +posix_cpu_clock_set(const clockid_t which_clock, const struct timespec64 *tp) { /* * You can never reset a CPU clock, but we check for other errors @@ -176,8 +157,8 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp) /* * Sample a per-thread clock for the given task. */ -static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, - unsigned long long *sample) +static int cpu_clock_sample(const clockid_t which_clock, + struct task_struct *p, u64 *sample) { switch (CPUCLOCK_WHICH(which_clock)) { default: @@ -260,7 +241,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) */ static int cpu_clock_sample_group(const clockid_t which_clock, struct task_struct *p, - unsigned long long *sample) + u64 *sample) { struct task_cputime cputime; @@ -269,11 +250,11 @@ static int cpu_clock_sample_group(const clockid_t which_clock, return -EINVAL; case CPUCLOCK_PROF: thread_group_cputime(p, &cputime); - *sample = cputime_to_expires(cputime.utime + cputime.stime); + *sample = cputime.utime + cputime.stime; break; case CPUCLOCK_VIRT: thread_group_cputime(p, &cputime); - *sample = cputime_to_expires(cputime.utime); + *sample = cputime.utime; break; case CPUCLOCK_SCHED: thread_group_cputime(p, &cputime); @@ -285,10 +266,10 @@ static int cpu_clock_sample_group(const clockid_t which_clock, static int posix_cpu_clock_get_task(struct task_struct *tsk, const clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { int err = -EINVAL; - unsigned long long rtn; + u64 rtn; if (CPUCLOCK_PERTHREAD(which_clock)) { if (same_thread_group(tsk, current)) @@ -299,13 +280,13 @@ static int posix_cpu_clock_get_task(struct task_struct *tsk, } if (!err) - sample_to_timespec(which_clock, rtn, tp); + *tp = ns_to_timespec64(rtn); return err; } -static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) +static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec64 *tp) { const pid_t pid = CPUCLOCK_PID(which_clock); int err = -EINVAL; @@ -346,6 +327,8 @@ static int posix_cpu_timer_create(struct k_itimer *new_timer) if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX) return -EINVAL; + new_timer->kclock = &clock_posix_cpu; + INIT_LIST_HEAD(&new_timer->it.cpu.entry); rcu_read_lock(); @@ -453,7 +436,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk) cleanup_timers(tsk->signal->cpu_timers); } -static inline int expires_gt(cputime_t expires, cputime_t new_exp) +static inline int expires_gt(u64 expires, u64 new_exp) { return expires == 0 || expires > new_exp; } @@ -488,7 +471,7 @@ static void arm_timer(struct k_itimer *timer) list_add(&nt->entry, listpos); if (listpos == head) { - unsigned long long exp = nt->expires; + u64 exp = nt->expires; /* * We are the new earliest-expiring POSIX 1.b timer, hence @@ -499,16 +482,15 @@ static void arm_timer(struct k_itimer *timer) switch (CPUCLOCK_WHICH(timer->it_clock)) { case CPUCLOCK_PROF: - if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp))) - cputime_expires->prof_exp = expires_to_cputime(exp); + if (expires_gt(cputime_expires->prof_exp, exp)) + cputime_expires->prof_exp = exp; break; case CPUCLOCK_VIRT: - if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp))) - cputime_expires->virt_exp = expires_to_cputime(exp); + if (expires_gt(cputime_expires->virt_exp, exp)) + cputime_expires->virt_exp = exp; break; case CPUCLOCK_SCHED: - if (cputime_expires->sched_exp == 0 || - cputime_expires->sched_exp > exp) + if (expires_gt(cputime_expires->sched_exp, exp)) cputime_expires->sched_exp = exp; break; } @@ -549,7 +531,8 @@ static void cpu_timer_fire(struct k_itimer *timer) * reload the timer. But we need to keep it * ticking in case the signal is deliverable next time. */ - posix_cpu_timer_schedule(timer); + posix_cpu_timer_rearm(timer); + ++timer->it_requeue_pending; } } @@ -559,8 +542,7 @@ static void cpu_timer_fire(struct k_itimer *timer) * traversal. */ static int cpu_timer_sample_group(const clockid_t which_clock, - struct task_struct *p, - unsigned long long *sample) + struct task_struct *p, u64 *sample) { struct task_cputime cputime; @@ -569,10 +551,10 @@ static int cpu_timer_sample_group(const clockid_t which_clock, default: return -EINVAL; case CPUCLOCK_PROF: - *sample = cputime_to_expires(cputime.utime + cputime.stime); + *sample = cputime.utime + cputime.stime; break; case CPUCLOCK_VIRT: - *sample = cputime_to_expires(cputime.utime); + *sample = cputime.utime; break; case CPUCLOCK_SCHED: *sample = cputime.sum_exec_runtime; @@ -588,17 +570,21 @@ static int cpu_timer_sample_group(const clockid_t which_clock, * and try again. (This happens when the timer is in the middle of firing.) */ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, - struct itimerspec *new, struct itimerspec *old) + struct itimerspec64 *new, struct itimerspec64 *old) { unsigned long flags; struct sighand_struct *sighand; struct task_struct *p = timer->it.cpu.task; - unsigned long long old_expires, new_expires, old_incr, val; + u64 old_expires, new_expires, old_incr, val; int ret; WARN_ON_ONCE(p == NULL); - new_expires = timespec_to_sample(timer->it_clock, &new->it_value); + /* + * Use the to_ktime conversion because that clamps the maximum + * value to KTIME_MAX and avoid multiplication overflows. + */ + new_expires = ktime_to_ns(timespec64_to_ktime(new->it_value)); /* * Protect against sighand release/switch in exit/exec and p->cpu_timers @@ -659,9 +645,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, bump_cpu_timer(timer, val); if (val < timer->it.cpu.expires) { old_expires = timer->it.cpu.expires - val; - sample_to_timespec(timer->it_clock, - old_expires, - &old->it_value); + old->it_value = ns_to_timespec64(old_expires); } else { old->it_value.tv_nsec = 1; old->it_value.tv_sec = 0; @@ -699,8 +683,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, * Install the new reload setting, and * set up the signal and overrun bookkeeping. */ - timer->it.cpu.incr = timespec_to_sample(timer->it_clock, - &new->it_interval); + timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); /* * This acts as a modification timestamp for the timer, @@ -723,17 +706,15 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, ret = 0; out: - if (old) { - sample_to_timespec(timer->it_clock, - old_incr, &old->it_interval); - } + if (old) + old->it_interval = ns_to_timespec64(old_incr); return ret; } -static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) +static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp) { - unsigned long long now; + u64 now; struct task_struct *p = timer->it.cpu.task; WARN_ON_ONCE(p == NULL); @@ -741,13 +722,10 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) /* * Easy part: convert the reload time. */ - sample_to_timespec(timer->it_clock, - timer->it.cpu.incr, &itp->it_interval); + itp->it_interval = ns_to_timespec64(timer->it.cpu.incr); - if (timer->it.cpu.expires == 0) { /* Timer not armed at all. */ - itp->it_value.tv_sec = itp->it_value.tv_nsec = 0; + if (!timer->it.cpu.expires) return; - } /* * Sample the clock to take the difference with the expiry time. @@ -771,8 +749,6 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) * Call the timer disarmed, nothing else to do. */ timer->it.cpu.expires = 0; - sample_to_timespec(timer->it_clock, timer->it.cpu.expires, - &itp->it_value); return; } else { cpu_timer_sample_group(timer->it_clock, p, &now); @@ -781,9 +757,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) } if (now < timer->it.cpu.expires) { - sample_to_timespec(timer->it_clock, - timer->it.cpu.expires - now, - &itp->it_value); + itp->it_value = ns_to_timespec64(timer->it.cpu.expires - now); } else { /* * The timer should have expired already, but the firing @@ -827,7 +801,7 @@ static void check_thread_timers(struct task_struct *tsk, struct list_head *timers = tsk->cpu_timers; struct signal_struct *const sig = tsk->signal; struct task_cputime *tsk_expires = &tsk->cputime_expires; - unsigned long long expires; + u64 expires; unsigned long soft; /* @@ -838,10 +812,10 @@ static void check_thread_timers(struct task_struct *tsk, return; expires = check_timers_list(timers, firing, prof_ticks(tsk)); - tsk_expires->prof_exp = expires_to_cputime(expires); + tsk_expires->prof_exp = expires; expires = check_timers_list(++timers, firing, virt_ticks(tsk)); - tsk_expires->virt_exp = expires_to_cputime(expires); + tsk_expires->virt_exp = expires; tsk_expires->sched_exp = check_timers_list(++timers, firing, tsk->se.sum_exec_runtime); @@ -860,6 +834,10 @@ static void check_thread_timers(struct task_struct *tsk, * At the hard limit, we just die. * No need to calculate anything else now. */ + if (print_fatal_signals) { + pr_info("CPU Watchdog Timeout (hard): %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); + } __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); return; } @@ -871,9 +849,10 @@ static void check_thread_timers(struct task_struct *tsk, soft += USEC_PER_SEC; sig->rlim[RLIMIT_RTTIME].rlim_cur = soft; } - printk(KERN_INFO - "RT Watchdog Timeout: %s[%d]\n", - tsk->comm, task_pid_nr(tsk)); + if (print_fatal_signals) { + pr_info("RT Watchdog Timeout (soft): %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); + } __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); } } @@ -890,26 +869,17 @@ static inline void stop_process_timers(struct signal_struct *sig) tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER); } -static u32 onecputick; - static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, - unsigned long long *expires, - unsigned long long cur_time, int signo) + u64 *expires, u64 cur_time, int signo) { if (!it->expires) return; if (cur_time >= it->expires) { - if (it->incr) { + if (it->incr) it->expires += it->incr; - it->error += it->incr_error; - if (it->error >= onecputick) { - it->expires -= cputime_one_jiffy; - it->error -= onecputick; - } - } else { + else it->expires = 0; - } trace_itimer_expire(signo == SIGPROF ? ITIMER_PROF : ITIMER_VIRTUAL, @@ -917,9 +887,8 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); } - if (it->expires && (!*expires || it->expires < *expires)) { + if (it->expires && (!*expires || it->expires < *expires)) *expires = it->expires; - } } /* @@ -931,8 +900,8 @@ static void check_process_timers(struct task_struct *tsk, struct list_head *firing) { struct signal_struct *const sig = tsk->signal; - unsigned long long utime, ptime, virt_expires, prof_expires; - unsigned long long sum_sched_runtime, sched_expires; + u64 utime, ptime, virt_expires, prof_expires; + u64 sum_sched_runtime, sched_expires; struct list_head *timers = sig->cpu_timers; struct task_cputime cputime; unsigned long soft; @@ -954,8 +923,8 @@ static void check_process_timers(struct task_struct *tsk, * Collect the current process totals. */ thread_group_cputimer(tsk, &cputime); - utime = cputime_to_expires(cputime.utime); - ptime = utime + cputime_to_expires(cputime.stime); + utime = cputime.utime; + ptime = utime + cputime.stime; sum_sched_runtime = cputime.sum_exec_runtime; prof_expires = check_timers_list(timers, firing, ptime); @@ -971,15 +940,19 @@ static void check_process_timers(struct task_struct *tsk, SIGVTALRM); soft = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur); if (soft != RLIM_INFINITY) { - unsigned long psecs = cputime_to_secs(ptime); + unsigned long psecs = div_u64(ptime, NSEC_PER_SEC); unsigned long hard = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_max); - cputime_t x; + u64 x; if (psecs >= hard) { /* * At the hard limit, we just die. * No need to calculate anything else now. */ + if (print_fatal_signals) { + pr_info("RT Watchdog Timeout (hard): %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); + } __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk); return; } @@ -987,20 +960,23 @@ static void check_process_timers(struct task_struct *tsk, /* * At the soft limit, send a SIGXCPU every second. */ + if (print_fatal_signals) { + pr_info("CPU Watchdog Timeout (soft): %s[%d]\n", + tsk->comm, task_pid_nr(tsk)); + } __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); if (soft < hard) { soft++; sig->rlim[RLIMIT_CPU].rlim_cur = soft; } } - x = secs_to_cputime(soft); - if (!prof_expires || x < prof_expires) { + x = soft * NSEC_PER_SEC; + if (!prof_expires || x < prof_expires) prof_expires = x; - } } - sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires); - sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires); + sig->cputime_expires.prof_exp = prof_expires; + sig->cputime_expires.virt_exp = virt_expires; sig->cputime_expires.sched_exp = sched_expires; if (task_cputime_zero(&sig->cputime_expires)) stop_process_timers(sig); @@ -1009,15 +985,15 @@ static void check_process_timers(struct task_struct *tsk, } /* - * This is called from the signal code (via do_schedule_next_timer) + * This is called from the signal code (via posixtimer_rearm) * when the last timer signal was delivered and we have to reload the timer. */ -void posix_cpu_timer_schedule(struct k_itimer *timer) +static void posix_cpu_timer_rearm(struct k_itimer *timer) { struct sighand_struct *sighand; unsigned long flags; struct task_struct *p = timer->it.cpu.task; - unsigned long long now; + u64 now; WARN_ON_ONCE(p == NULL); @@ -1028,12 +1004,12 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) cpu_clock_sample(timer->it_clock, p, &now); bump_cpu_timer(timer, now); if (unlikely(p->exit_state)) - goto out; + return; /* Protect timer list r/w in arm_timer() */ sighand = lock_task_sighand(p, &flags); if (!sighand) - goto out; + return; } else { /* * Protect arm_timer() and timer sampling in case of call to @@ -1046,11 +1022,10 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) * We can't even collect a sample any more. */ timer->it.cpu.expires = 0; - goto out; + return; } else if (unlikely(p->exit_state) && thread_group_empty(p)) { - unlock_task_sighand(p, &flags); - /* Optimizations: if the process is dying, no need to rearm */ - goto out; + /* If the process is dying, no need to rearm */ + goto unlock; } cpu_timer_sample_group(timer->it_clock, p, &now); bump_cpu_timer(timer, now); @@ -1062,12 +1037,8 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) */ WARN_ON_ONCE(!irqs_disabled()); arm_timer(timer); +unlock: unlock_task_sighand(p, &flags); - -out: - timer->it_overrun_last = timer->it_overrun; - timer->it_overrun = -1; - ++timer->it_requeue_pending; } /** @@ -1214,9 +1185,9 @@ void run_posix_cpu_timers(struct task_struct *tsk) * The tsk->sighand->siglock must be held by the caller. */ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, - cputime_t *newval, cputime_t *oldval) + u64 *newval, u64 *oldval) { - unsigned long long now; + u64 now; WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED); cpu_timer_sample_group(clock_idx, tsk, &now); @@ -1230,7 +1201,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, if (*oldval) { if (*oldval <= now) { /* Just about to fire. */ - *oldval = cputime_one_jiffy; + *oldval = TICK_NSEC; } else { *oldval -= now; } @@ -1260,9 +1231,11 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, } static int do_cpu_nanosleep(const clockid_t which_clock, int flags, - struct timespec *rqtp, struct itimerspec *it) + const struct timespec64 *rqtp) { + struct itimerspec64 it; struct k_itimer timer; + u64 expires; int error; /* @@ -1275,13 +1248,14 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, error = posix_cpu_timer_create(&timer); timer.it_process = current; if (!error) { - static struct itimerspec zero_it; + static struct itimerspec64 zero_it; + struct restart_block *restart; - memset(it, 0, sizeof *it); - it->it_value = *rqtp; + memset(&it, 0, sizeof(it)); + it.it_value = *rqtp; spin_lock_irq(&timer.it_lock); - error = posix_cpu_timer_set(&timer, flags, it, NULL); + error = posix_cpu_timer_set(&timer, flags, &it, NULL); if (error) { spin_unlock_irq(&timer.it_lock); return error; @@ -1310,8 +1284,8 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, /* * We were interrupted by a signal. */ - sample_to_timespec(which_clock, timer.it.cpu.expires, rqtp); - error = posix_cpu_timer_set(&timer, 0, &zero_it, it); + expires = timer.it.cpu.expires; + error = posix_cpu_timer_set(&timer, 0, &zero_it, &it); if (!error) { /* * Timer is now unarmed, deletion can not fail. @@ -1331,7 +1305,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, spin_unlock_irq(&timer.it_lock); } - if ((it->it_value.tv_sec | it->it_value.tv_nsec) == 0) { + if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) { /* * It actually did fire already. */ @@ -1339,6 +1313,13 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, } error = -ERESTART_RESTARTBLOCK; + /* + * Report back to the user the time still remaining. + */ + restart = ¤t->restart_block; + restart->nanosleep.expires = expires; + if (restart->nanosleep.type != TT_NONE) + error = nanosleep_copyout(restart, &it.it_value); } return error; @@ -1347,10 +1328,9 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, static long posix_cpu_nsleep_restart(struct restart_block *restart_block); static int posix_cpu_nsleep(const clockid_t which_clock, int flags, - struct timespec *rqtp, struct timespec __user *rmtp) + const struct timespec64 *rqtp) { struct restart_block *restart_block = ¤t->restart_block; - struct itimerspec it; int error; /* @@ -1358,25 +1338,18 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, */ if (CPUCLOCK_PERTHREAD(which_clock) && (CPUCLOCK_PID(which_clock) == 0 || - CPUCLOCK_PID(which_clock) == current->pid)) + CPUCLOCK_PID(which_clock) == task_pid_vnr(current))) return -EINVAL; - error = do_cpu_nanosleep(which_clock, flags, rqtp, &it); + error = do_cpu_nanosleep(which_clock, flags, rqtp); if (error == -ERESTART_RESTARTBLOCK) { if (flags & TIMER_ABSTIME) return -ERESTARTNOHAND; - /* - * Report back to the user the time still remaining. - */ - if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) - return -EFAULT; restart_block->fn = posix_cpu_nsleep_restart; restart_block->nanosleep.clockid = which_clock; - restart_block->nanosleep.rmtp = rmtp; - restart_block->nanosleep.expires = timespec_to_ns(rqtp); } return error; } @@ -1384,38 +1357,23 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, static long posix_cpu_nsleep_restart(struct restart_block *restart_block) { clockid_t which_clock = restart_block->nanosleep.clockid; - struct timespec t; - struct itimerspec it; - int error; - - t = ns_to_timespec(restart_block->nanosleep.expires); + struct timespec64 t; - error = do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t, &it); - - if (error == -ERESTART_RESTARTBLOCK) { - struct timespec __user *rmtp = restart_block->nanosleep.rmtp; - /* - * Report back to the user the time still remaining. - */ - if (rmtp && copy_to_user(rmtp, &it.it_value, sizeof *rmtp)) - return -EFAULT; - - restart_block->nanosleep.expires = timespec_to_ns(&t); - } - return error; + t = ns_to_timespec64(restart_block->nanosleep.expires); + return do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t); } #define PROCESS_CLOCK MAKE_PROCESS_CPUCLOCK(0, CPUCLOCK_SCHED) #define THREAD_CLOCK MAKE_THREAD_CPUCLOCK(0, CPUCLOCK_SCHED) static int process_cpu_clock_getres(const clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { return posix_cpu_clock_getres(PROCESS_CLOCK, tp); } static int process_cpu_clock_get(const clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { return posix_cpu_clock_get(PROCESS_CLOCK, tp); } @@ -1425,22 +1383,17 @@ static int process_cpu_timer_create(struct k_itimer *timer) return posix_cpu_timer_create(timer); } static int process_cpu_nsleep(const clockid_t which_clock, int flags, - struct timespec *rqtp, - struct timespec __user *rmtp) + const struct timespec64 *rqtp) { - return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp, rmtp); -} -static long process_cpu_nsleep_restart(struct restart_block *restart_block) -{ - return -EINVAL; + return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp); } static int thread_cpu_clock_getres(const clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { return posix_cpu_clock_getres(THREAD_CLOCK, tp); } static int thread_cpu_clock_get(const clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { return posix_cpu_clock_get(THREAD_CLOCK, tp); } @@ -1450,41 +1403,27 @@ static int thread_cpu_timer_create(struct k_itimer *timer) return posix_cpu_timer_create(timer); } -struct k_clock clock_posix_cpu = { +const struct k_clock clock_posix_cpu = { .clock_getres = posix_cpu_clock_getres, .clock_set = posix_cpu_clock_set, .clock_get = posix_cpu_clock_get, .timer_create = posix_cpu_timer_create, .nsleep = posix_cpu_nsleep, - .nsleep_restart = posix_cpu_nsleep_restart, .timer_set = posix_cpu_timer_set, .timer_del = posix_cpu_timer_del, .timer_get = posix_cpu_timer_get, + .timer_rearm = posix_cpu_timer_rearm, }; -static __init int init_posix_cpu_timers(void) -{ - struct k_clock process = { - .clock_getres = process_cpu_clock_getres, - .clock_get = process_cpu_clock_get, - .timer_create = process_cpu_timer_create, - .nsleep = process_cpu_nsleep, - .nsleep_restart = process_cpu_nsleep_restart, - }; - struct k_clock thread = { - .clock_getres = thread_cpu_clock_getres, - .clock_get = thread_cpu_clock_get, - .timer_create = thread_cpu_timer_create, - }; - struct timespec ts; - - posix_timers_register_clock(CLOCK_PROCESS_CPUTIME_ID, &process); - posix_timers_register_clock(CLOCK_THREAD_CPUTIME_ID, &thread); - - cputime_to_timespec(cputime_one_jiffy, &ts); - onecputick = ts.tv_nsec; - WARN_ON(ts.tv_sec != 0); +const struct k_clock clock_process = { + .clock_getres = process_cpu_clock_getres, + .clock_get = process_cpu_clock_get, + .timer_create = process_cpu_timer_create, + .nsleep = process_cpu_nsleep, +}; - return 0; -} -__initcall(init_posix_cpu_timers); +const struct k_clock clock_thread = { + .clock_getres = thread_cpu_clock_getres, + .clock_get = thread_cpu_clock_get, + .timer_create = thread_cpu_timer_create, +}; diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index cd6716e115e8..06f34feb635e 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -17,6 +17,7 @@ #include <linux/ktime.h> #include <linux/timekeeping.h> #include <linux/posix-timers.h> +#include <linux/compat.h> asmlinkage long sys_ni_posix_timers(void) { @@ -27,6 +28,7 @@ asmlinkage long sys_ni_posix_timers(void) } #define SYS_NI(name) SYSCALL_ALIAS(sys_##name, sys_ni_posix_timers) +#define COMPAT_SYS_NI(name) SYSCALL_ALIAS(compat_sys_##name, sys_ni_posix_timers) SYS_NI(timer_create); SYS_NI(timer_gettime); @@ -49,34 +51,52 @@ SYS_NI(alarm); SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, const struct timespec __user *, tp) { - struct timespec new_tp; + struct timespec64 new_tp; if (which_clock != CLOCK_REALTIME) return -EINVAL; - if (copy_from_user(&new_tp, tp, sizeof (*tp))) + if (get_timespec64(&new_tp, tp)) return -EFAULT; - return do_sys_settimeofday(&new_tp, NULL); + + return do_sys_settimeofday64(&new_tp, NULL); } -SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, - struct timespec __user *,tp) +int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp) { - struct timespec kernel_tp; - switch (which_clock) { - case CLOCK_REALTIME: ktime_get_real_ts(&kernel_tp); break; - case CLOCK_MONOTONIC: ktime_get_ts(&kernel_tp); break; - case CLOCK_BOOTTIME: get_monotonic_boottime(&kernel_tp); break; - default: return -EINVAL; + case CLOCK_REALTIME: + ktime_get_real_ts64(tp); + break; + case CLOCK_MONOTONIC: + ktime_get_ts64(tp); + break; + case CLOCK_BOOTTIME: + get_monotonic_boottime64(tp); + break; + default: + return -EINVAL; } - if (copy_to_user(tp, &kernel_tp, sizeof (kernel_tp))) + + return 0; +} +SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, + struct timespec __user *, tp) +{ + int ret; + struct timespec64 kernel_tp; + + ret = do_clock_gettime(which_clock, &kernel_tp); + if (ret) + return ret; + + if (put_timespec64(&kernel_tp, tp)) return -EFAULT; return 0; } SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct timespec __user *, tp) { - struct timespec rtn_tp = { + struct timespec64 rtn_tp = { .tv_sec = 0, .tv_nsec = hrtimer_resolution, }; @@ -85,7 +105,7 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct timespec __us case CLOCK_REALTIME: case CLOCK_MONOTONIC: case CLOCK_BOOTTIME: - if (copy_to_user(tp, &rtn_tp, sizeof(rtn_tp))) + if (put_timespec64(&rtn_tp, tp)) return -EFAULT; return 0; default: @@ -97,27 +117,115 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, const struct timespec __user *, rqtp, struct timespec __user *, rmtp) { + struct timespec64 t64; struct timespec t; switch (which_clock) { case CLOCK_REALTIME: case CLOCK_MONOTONIC: case CLOCK_BOOTTIME: - if (copy_from_user(&t, rqtp, sizeof (struct timespec))) - return -EFAULT; - if (!timespec_valid(&t)) - return -EINVAL; - return hrtimer_nanosleep(&t, rmtp, flags & TIMER_ABSTIME ? - HRTIMER_MODE_ABS : HRTIMER_MODE_REL, - which_clock); + break; default: return -EINVAL; } + + if (copy_from_user(&t, rqtp, sizeof (struct timespec))) + return -EFAULT; + t64 = timespec_to_timespec64(t); + if (!timespec64_valid(&t64)) + return -EINVAL; + if (flags & TIMER_ABSTIME) + rmtp = NULL; + current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; + current->restart_block.nanosleep.rmtp = rmtp; + return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ? + HRTIMER_MODE_ABS : HRTIMER_MODE_REL, + which_clock); } #ifdef CONFIG_COMPAT -long clock_nanosleep_restart(struct restart_block *restart_block) +COMPAT_SYS_NI(timer_create); +COMPAT_SYS_NI(clock_adjtime); +COMPAT_SYS_NI(timer_settime); +COMPAT_SYS_NI(timer_gettime); +COMPAT_SYS_NI(getitimer); +COMPAT_SYS_NI(setitimer); + +COMPAT_SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, + struct compat_timespec __user *, tp) +{ + struct timespec64 new_tp; + + if (which_clock != CLOCK_REALTIME) + return -EINVAL; + if (compat_get_timespec64(&new_tp, tp)) + return -EFAULT; + + return do_sys_settimeofday64(&new_tp, NULL); +} + +COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, + struct compat_timespec __user *, tp) { - return hrtimer_nanosleep_restart(restart_block); + int ret; + struct timespec64 kernel_tp; + + ret = do_clock_gettime(which_clock, &kernel_tp); + if (ret) + return ret; + + if (compat_put_timespec64(&kernel_tp, tp)) + return -EFAULT; + return 0; +} + +COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, + struct compat_timespec __user *, tp) +{ + struct timespec64 rtn_tp = { + .tv_sec = 0, + .tv_nsec = hrtimer_resolution, + }; + + switch (which_clock) { + case CLOCK_REALTIME: + case CLOCK_MONOTONIC: + case CLOCK_BOOTTIME: + if (compat_put_timespec64(&rtn_tp, tp)) + return -EFAULT; + return 0; + default: + return -EINVAL; + } +} + +COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, + struct compat_timespec __user *, rqtp, + struct compat_timespec __user *, rmtp) +{ + struct timespec64 t64; + struct timespec t; + + switch (which_clock) { + case CLOCK_REALTIME: + case CLOCK_MONOTONIC: + case CLOCK_BOOTTIME: + break; + default: + return -EINVAL; + } + + if (compat_get_timespec(&t, rqtp)) + return -EFAULT; + t64 = timespec_to_timespec64(t); + if (!timespec64_valid(&t64)) + return -EINVAL; + if (flags & TIMER_ABSTIME) + rmtp = NULL; + current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; + current->restart_block.nanosleep.compat_rmtp = rmtp; + return hrtimer_nanosleep(&t64, flags & TIMER_ABSTIME ? + HRTIMER_MODE_ABS : HRTIMER_MODE_REL, + which_clock); } #endif diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 1e6623d76750..13d6881f908b 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -35,6 +35,7 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/mutex.h> +#include <linux/sched/task.h> #include <linux/uaccess.h> #include <linux/list.h> @@ -48,8 +49,10 @@ #include <linux/workqueue.h> #include <linux/export.h> #include <linux/hashtable.h> +#include <linux/compat.h> #include "timekeeping.h" +#include "posix-timers.h" /* * Management arrays for POSIX timers. Timers are now kept in static hash table @@ -68,6 +71,10 @@ static struct kmem_cache *posix_timers_cache; static DEFINE_HASHTABLE(posix_timers_hashtable, 9); static DEFINE_SPINLOCK(hash_lock); +static const struct k_clock * const posix_clocks[]; +static const struct k_clock *clockid_to_kclock(const clockid_t id); +static const struct k_clock clock_realtime, clock_monotonic; + /* * we assume that the new SIGEV_THREAD_ID shares no bits with the other * SIGEV values. Here we put out an error if this assumption fails. @@ -123,22 +130,6 @@ static DEFINE_SPINLOCK(hash_lock); * have is CLOCK_REALTIME and its high res counter part, both of * which we beg off on and pass to do_sys_settimeofday(). */ - -static struct k_clock posix_clocks[MAX_CLOCKS]; - -/* - * These ones are defined below. - */ -static int common_nsleep(const clockid_t, int flags, struct timespec *t, - struct timespec __user *rmtp); -static int common_timer_create(struct k_itimer *new_timer); -static void common_timer_get(struct k_itimer *, struct itimerspec *); -static int common_timer_set(struct k_itimer *, int, - struct itimerspec *, struct itimerspec *); -static int common_timer_del(struct k_itimer *timer); - -static enum hrtimer_restart posix_timer_fn(struct hrtimer *data); - static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags); #define lock_timer(tid, flags) \ @@ -203,17 +194,17 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) } /* Get clock_realtime */ -static int posix_clock_realtime_get(clockid_t which_clock, struct timespec *tp) +static int posix_clock_realtime_get(clockid_t which_clock, struct timespec64 *tp) { - ktime_get_real_ts(tp); + ktime_get_real_ts64(tp); return 0; } /* Set clock_realtime */ static int posix_clock_realtime_set(const clockid_t which_clock, - const struct timespec *tp) + const struct timespec64 *tp) { - return do_sys_settimeofday(tp, NULL); + return do_sys_settimeofday64(tp, NULL); } static int posix_clock_realtime_adj(const clockid_t which_clock, @@ -225,54 +216,54 @@ static int posix_clock_realtime_adj(const clockid_t which_clock, /* * Get monotonic time for posix timers */ -static int posix_ktime_get_ts(clockid_t which_clock, struct timespec *tp) +static int posix_ktime_get_ts(clockid_t which_clock, struct timespec64 *tp) { - ktime_get_ts(tp); + ktime_get_ts64(tp); return 0; } /* * Get monotonic-raw time for posix timers */ -static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp) +static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp) { - getrawmonotonic(tp); + getrawmonotonic64(tp); return 0; } -static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp) +static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp) { - *tp = current_kernel_time(); + *tp = current_kernel_time64(); return 0; } static int posix_get_monotonic_coarse(clockid_t which_clock, - struct timespec *tp) + struct timespec64 *tp) { - *tp = get_monotonic_coarse(); + *tp = get_monotonic_coarse64(); return 0; } -static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp) +static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *tp) { - *tp = ktime_to_timespec(KTIME_LOW_RES); + *tp = ktime_to_timespec64(KTIME_LOW_RES); return 0; } -static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp) +static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp) { - get_monotonic_boottime(tp); + get_monotonic_boottime64(tp); return 0; } -static int posix_get_tai(clockid_t which_clock, struct timespec *tp) +static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp) { - timekeeping_clocktai(tp); + timekeeping_clocktai64(tp); return 0; } -static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec *tp) +static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp) { tp->tv_sec = 0; tp->tv_nsec = hrtimer_resolution; @@ -284,91 +275,23 @@ static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec *tp) */ static __init int init_posix_timers(void) { - struct k_clock clock_realtime = { - .clock_getres = posix_get_hrtimer_res, - .clock_get = posix_clock_realtime_get, - .clock_set = posix_clock_realtime_set, - .clock_adj = posix_clock_realtime_adj, - .nsleep = common_nsleep, - .nsleep_restart = hrtimer_nanosleep_restart, - .timer_create = common_timer_create, - .timer_set = common_timer_set, - .timer_get = common_timer_get, - .timer_del = common_timer_del, - }; - struct k_clock clock_monotonic = { - .clock_getres = posix_get_hrtimer_res, - .clock_get = posix_ktime_get_ts, - .nsleep = common_nsleep, - .nsleep_restart = hrtimer_nanosleep_restart, - .timer_create = common_timer_create, - .timer_set = common_timer_set, - .timer_get = common_timer_get, - .timer_del = common_timer_del, - }; - struct k_clock clock_monotonic_raw = { - .clock_getres = posix_get_hrtimer_res, - .clock_get = posix_get_monotonic_raw, - }; - struct k_clock clock_realtime_coarse = { - .clock_getres = posix_get_coarse_res, - .clock_get = posix_get_realtime_coarse, - }; - struct k_clock clock_monotonic_coarse = { - .clock_getres = posix_get_coarse_res, - .clock_get = posix_get_monotonic_coarse, - }; - struct k_clock clock_tai = { - .clock_getres = posix_get_hrtimer_res, - .clock_get = posix_get_tai, - .nsleep = common_nsleep, - .nsleep_restart = hrtimer_nanosleep_restart, - .timer_create = common_timer_create, - .timer_set = common_timer_set, - .timer_get = common_timer_get, - .timer_del = common_timer_del, - }; - struct k_clock clock_boottime = { - .clock_getres = posix_get_hrtimer_res, - .clock_get = posix_get_boottime, - .nsleep = common_nsleep, - .nsleep_restart = hrtimer_nanosleep_restart, - .timer_create = common_timer_create, - .timer_set = common_timer_set, - .timer_get = common_timer_get, - .timer_del = common_timer_del, - }; - - posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime); - posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic); - posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw); - posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse); - posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse); - posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime); - posix_timers_register_clock(CLOCK_TAI, &clock_tai); - posix_timers_cache = kmem_cache_create("posix_timers_cache", sizeof (struct k_itimer), 0, SLAB_PANIC, NULL); return 0; } - __initcall(init_posix_timers); -static void schedule_next_timer(struct k_itimer *timr) +static void common_hrtimer_rearm(struct k_itimer *timr) { struct hrtimer *timer = &timr->it.real.timer; - if (timr->it.real.interval == 0) + if (!timr->it_interval) return; timr->it_overrun += (unsigned int) hrtimer_forward(timer, timer->base->get_time(), - timr->it.real.interval); - - timr->it_overrun_last = timr->it_overrun; - timr->it_overrun = -1; - ++timr->it_requeue_pending; + timr->it_interval); hrtimer_restart(timer); } @@ -383,24 +306,27 @@ static void schedule_next_timer(struct k_itimer *timr) * To protect against the timer going away while the interrupt is queued, * we require that the it_requeue_pending flag be set. */ -void do_schedule_next_timer(struct siginfo *info) +void posixtimer_rearm(struct siginfo *info) { struct k_itimer *timr; unsigned long flags; timr = lock_timer(info->si_tid, &flags); + if (!timr) + return; - if (timr && timr->it_requeue_pending == info->si_sys_private) { - if (timr->it_clock < 0) - posix_cpu_timer_schedule(timr); - else - schedule_next_timer(timr); + if (timr->it_requeue_pending == info->si_sys_private) { + timr->kclock->timer_rearm(timr); + + timr->it_active = 1; + timr->it_overrun_last = timr->it_overrun; + timr->it_overrun = -1; + ++timr->it_requeue_pending; info->si_overrun += timr->it_overrun_last; } - if (timr) - unlock_timer(timr, flags); + unlock_timer(timr, flags); } int posix_timer_event(struct k_itimer *timr, int si_private) @@ -409,12 +335,12 @@ int posix_timer_event(struct k_itimer *timr, int si_private) int shared, ret = -1; /* * FIXME: if ->sigq is queued we can race with - * dequeue_signal()->do_schedule_next_timer(). + * dequeue_signal()->posixtimer_rearm(). * * If dequeue_signal() sees the "right" value of - * si_sys_private it calls do_schedule_next_timer(). + * si_sys_private it calls posixtimer_rearm(). * We re-queue ->sigq and drop ->it_lock(). - * do_schedule_next_timer() locks the timer + * posixtimer_rearm() locks the timer * and re-schedules it while ->sigq is pending. * Not really bad, but not that we want. */ @@ -430,7 +356,6 @@ int posix_timer_event(struct k_itimer *timr, int si_private) /* If we failed to send the signal the timer stops. */ return ret > 0; } -EXPORT_SYMBOL_GPL(posix_timer_event); /* * This function gets called when a POSIX.1b interval timer expires. It @@ -449,7 +374,8 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) timr = container_of(timer, struct k_itimer, it.real.timer); spin_lock_irqsave(&timr->it_lock, flags); - if (timr->it.real.interval != 0) + timr->it_active = 0; + if (timr->it_interval != 0) si_private = ++timr->it_requeue_pending; if (posix_timer_event(timr, si_private)) { @@ -458,7 +384,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) * we will not get a call back to restart it AND * it should be restarted. */ - if (timr->it.real.interval != 0) { + if (timr->it_interval != 0) { ktime_t now = hrtimer_cb_get_time(timer); /* @@ -487,15 +413,16 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) { ktime_t kj = NSEC_PER_SEC / HZ; - if (timr->it.real.interval < kj) + if (timr->it_interval < kj) now = ktime_add(now, kj); } #endif timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, - timr->it.real.interval); + timr->it_interval); ret = HRTIMER_RESTART; ++timr->it_requeue_pending; + timr->it_active = 1; } } @@ -520,30 +447,6 @@ static struct pid *good_sigevent(sigevent_t * event) return task_pid(rtn); } -void posix_timers_register_clock(const clockid_t clock_id, - struct k_clock *new_clock) -{ - if ((unsigned) clock_id >= MAX_CLOCKS) { - printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n", - clock_id); - return; - } - - if (!new_clock->clock_get) { - printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n", - clock_id); - return; - } - if (!new_clock->clock_getres) { - printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n", - clock_id); - return; - } - - posix_clocks[clock_id] = *new_clock; -} -EXPORT_SYMBOL_GPL(posix_timers_register_clock); - static struct k_itimer * alloc_posix_timer(void) { struct k_itimer *tmr; @@ -580,17 +483,6 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) call_rcu(&tmr->it.rcu, k_itimer_rcu_free); } -static struct k_clock *clockid_to_kclock(const clockid_t id) -{ - if (id < 0) - return (id & CLOCKFD_MASK) == CLOCKFD ? - &clock_posix_dynamic : &clock_posix_cpu; - - if (id >= MAX_CLOCKS || !posix_clocks[id].clock_getres) - return NULL; - return &posix_clocks[id]; -} - static int common_timer_create(struct k_itimer *new_timer) { hrtimer_init(&new_timer->it.real.timer, new_timer->it_clock, 0); @@ -598,15 +490,12 @@ static int common_timer_create(struct k_itimer *new_timer) } /* Create a POSIX.1b interval timer. */ - -SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, - struct sigevent __user *, timer_event_spec, - timer_t __user *, created_timer_id) +static int do_timer_create(clockid_t which_clock, struct sigevent *event, + timer_t __user *created_timer_id) { - struct k_clock *kc = clockid_to_kclock(which_clock); + const struct k_clock *kc = clockid_to_kclock(which_clock); struct k_itimer *new_timer; int error, new_timer_id; - sigevent_t event; int it_id_set = IT_ID_NOT_SET; if (!kc) @@ -628,31 +517,28 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, it_id_set = IT_ID_SET; new_timer->it_id = (timer_t) new_timer_id; new_timer->it_clock = which_clock; + new_timer->kclock = kc; new_timer->it_overrun = -1; - if (timer_event_spec) { - if (copy_from_user(&event, timer_event_spec, sizeof (event))) { - error = -EFAULT; - goto out; - } + if (event) { rcu_read_lock(); - new_timer->it_pid = get_pid(good_sigevent(&event)); + new_timer->it_pid = get_pid(good_sigevent(event)); rcu_read_unlock(); if (!new_timer->it_pid) { error = -EINVAL; goto out; } + new_timer->it_sigev_notify = event->sigev_notify; + new_timer->sigq->info.si_signo = event->sigev_signo; + new_timer->sigq->info.si_value = event->sigev_value; } else { - memset(&event.sigev_value, 0, sizeof(event.sigev_value)); - event.sigev_notify = SIGEV_SIGNAL; - event.sigev_signo = SIGALRM; - event.sigev_value.sival_int = new_timer->it_id; + new_timer->it_sigev_notify = SIGEV_SIGNAL; + new_timer->sigq->info.si_signo = SIGALRM; + memset(&new_timer->sigq->info.si_value, 0, sizeof(sigval_t)); + new_timer->sigq->info.si_value.sival_int = new_timer->it_id; new_timer->it_pid = get_pid(task_tgid(current)); } - new_timer->it_sigev_notify = event.sigev_notify; - new_timer->sigq->info.si_signo = event.sigev_signo; - new_timer->sigq->info.si_value = event.sigev_value; new_timer->sigq->info.si_tid = new_timer->it_id; new_timer->sigq->info.si_code = SI_TIMER; @@ -683,6 +569,36 @@ out: return error; } +SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, + struct sigevent __user *, timer_event_spec, + timer_t __user *, created_timer_id) +{ + if (timer_event_spec) { + sigevent_t event; + + if (copy_from_user(&event, timer_event_spec, sizeof (event))) + return -EFAULT; + return do_timer_create(which_clock, &event, created_timer_id); + } + return do_timer_create(which_clock, NULL, created_timer_id); +} + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock, + struct compat_sigevent __user *, timer_event_spec, + timer_t __user *, created_timer_id) +{ + if (timer_event_spec) { + sigevent_t event; + + if (get_compat_sigevent(&event, timer_event_spec)) + return -EFAULT; + return do_timer_create(which_clock, &event, created_timer_id); + } + return do_timer_create(which_clock, NULL, created_timer_id); +} +#endif + /* * Locking issues: We need to protect the result of the id look up until * we get the timer locked down so it is not deleted under us. The @@ -716,6 +632,20 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) return NULL; } +static ktime_t common_hrtimer_remaining(struct k_itimer *timr, ktime_t now) +{ + struct hrtimer *timer = &timr->it.real.timer; + + return __hrtimer_expires_remaining_adjusted(timer, now); +} + +static int common_hrtimer_forward(struct k_itimer *timr, ktime_t now) +{ + struct hrtimer *timer = &timr->it.real.timer; + + return (int)hrtimer_forward(timer, now, timr->it_interval); +} + /* * Get the time remaining on a POSIX.1b interval timer. This function * is ALWAYS called with spin_lock_irq on the timer, thus it must not @@ -732,54 +662,61 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) * it is the same as a requeue pending timer WRT to what we should * report. */ -static void -common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) +void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) { + const struct k_clock *kc = timr->kclock; ktime_t now, remaining, iv; - struct hrtimer *timer = &timr->it.real.timer; + struct timespec64 ts64; + bool sig_none; - memset(cur_setting, 0, sizeof(struct itimerspec)); - - iv = timr->it.real.interval; + sig_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE; + iv = timr->it_interval; /* interval timer ? */ - if (iv) - cur_setting->it_interval = ktime_to_timespec(iv); - else if (!hrtimer_active(timer) && - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) - return; + if (iv) { + cur_setting->it_interval = ktime_to_timespec64(iv); + } else if (!timr->it_active) { + /* + * SIGEV_NONE oneshot timers are never queued. Check them + * below. + */ + if (!sig_none) + return; + } - now = timer->base->get_time(); + /* + * The timespec64 based conversion is suboptimal, but it's not + * worth to implement yet another callback. + */ + kc->clock_get(timr->it_clock, &ts64); + now = timespec64_to_ktime(ts64); /* - * When a requeue is pending or this is a SIGEV_NONE - * timer move the expiry time forward by intervals, so - * expiry is > now. + * When a requeue is pending or this is a SIGEV_NONE timer move the + * expiry time forward by intervals, so expiry is > now. */ - if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || - (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) - timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv); + if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none)) + timr->it_overrun += kc->timer_forward(timr, now); - remaining = __hrtimer_expires_remaining_adjusted(timer, now); + remaining = kc->timer_remaining(timr, now); /* Return 0 only, when the timer is expired and not pending */ if (remaining <= 0) { /* * A single shot SIGEV_NONE timer must return 0, when * it is expired ! */ - if ((timr->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) + if (!sig_none) cur_setting->it_value.tv_nsec = 1; - } else - cur_setting->it_value = ktime_to_timespec(remaining); + } else { + cur_setting->it_value = ktime_to_timespec64(remaining); + } } /* Get the time remaining on a POSIX.1b interval timer. */ -SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, - struct itimerspec __user *, setting) +static int do_timer_gettime(timer_t timer_id, struct itimerspec64 *setting) { - struct itimerspec cur_setting; struct k_itimer *timr; - struct k_clock *kc; + const struct k_clock *kc; unsigned long flags; int ret = 0; @@ -787,19 +724,45 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, if (!timr) return -EINVAL; - kc = clockid_to_kclock(timr->it_clock); + memset(setting, 0, sizeof(*setting)); + kc = timr->kclock; if (WARN_ON_ONCE(!kc || !kc->timer_get)) ret = -EINVAL; else - kc->timer_get(timr, &cur_setting); + kc->timer_get(timr, setting); unlock_timer(timr, flags); + return ret; +} - if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting))) - return -EFAULT; +/* Get the time remaining on a POSIX.1b interval timer. */ +SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, + struct itimerspec __user *, setting) +{ + struct itimerspec64 cur_setting; + + int ret = do_timer_gettime(timer_id, &cur_setting); + if (!ret) { + if (put_itimerspec64(&cur_setting, setting)) + ret = -EFAULT; + } + return ret; +} + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, + struct compat_itimerspec __user *, setting) +{ + struct itimerspec64 cur_setting; + int ret = do_timer_gettime(timer_id, &cur_setting); + if (!ret) { + if (put_compat_itimerspec64(&cur_setting, setting)) + ret = -EFAULT; + } return ret; } +#endif /* * Get the number of overruns of a POSIX.1b interval timer. This is to @@ -807,7 +770,7 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, * accumulating overruns on the next timer. The overrun is frozen when * the signal is delivered, either at the notify time (if the info block * is not queued) or at the actual delivery time (as we are informed by - * the call back to do_schedule_next_timer(). So all we need to do is + * the call back to posixtimer_rearm(). So all we need to do is * to pick up the frozen overrun. */ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) @@ -826,114 +789,175 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) return overrun; } -/* Set a POSIX.1b interval timer. */ -/* timr->it_lock is taken. */ -static int -common_timer_set(struct k_itimer *timr, int flags, - struct itimerspec *new_setting, struct itimerspec *old_setting) +static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires, + bool absolute, bool sigev_none) { struct hrtimer *timer = &timr->it.real.timer; enum hrtimer_mode mode; + mode = absolute ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL; + /* + * Posix magic: Relative CLOCK_REALTIME timers are not affected by + * clock modifications, so they become CLOCK_MONOTONIC based under the + * hood. See hrtimer_init(). Update timr->kclock, so the generic + * functions which use timr->kclock->clock_get() work. + * + * Note: it_clock stays unmodified, because the next timer_set() might + * use ABSTIME, so it needs to switch back. + */ + if (timr->it_clock == CLOCK_REALTIME) + timr->kclock = absolute ? &clock_realtime : &clock_monotonic; + + hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); + timr->it.real.timer.function = posix_timer_fn; + + if (!absolute) + expires = ktime_add_safe(expires, timer->base->get_time()); + hrtimer_set_expires(timer, expires); + + if (!sigev_none) + hrtimer_start_expires(timer, HRTIMER_MODE_ABS); +} + +static int common_hrtimer_try_to_cancel(struct k_itimer *timr) +{ + return hrtimer_try_to_cancel(&timr->it.real.timer); +} + +/* Set a POSIX.1b interval timer. */ +int common_timer_set(struct k_itimer *timr, int flags, + struct itimerspec64 *new_setting, + struct itimerspec64 *old_setting) +{ + const struct k_clock *kc = timr->kclock; + bool sigev_none; + ktime_t expires; + if (old_setting) common_timer_get(timr, old_setting); - /* disable the timer */ - timr->it.real.interval = 0; + /* Prevent rearming by clearing the interval */ + timr->it_interval = 0; /* - * careful here. If smp we could be in the "fire" routine which will - * be spinning as we hold the lock. But this is ONLY an SMP issue. + * Careful here. On SMP systems the timer expiry function could be + * active and spinning on timr->it_lock. */ - if (hrtimer_try_to_cancel(timer) < 0) + if (kc->timer_try_to_cancel(timr) < 0) return TIMER_RETRY; - timr->it_requeue_pending = (timr->it_requeue_pending + 2) & + timr->it_active = 0; + timr->it_requeue_pending = (timr->it_requeue_pending + 2) & ~REQUEUE_PENDING; timr->it_overrun_last = 0; - /* switch off the timer when it_value is zero */ + /* Switch off the timer when it_value is zero */ if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) return 0; - mode = flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL; - hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); - timr->it.real.timer.function = posix_timer_fn; - - hrtimer_set_expires(timer, timespec_to_ktime(new_setting->it_value)); - - /* Convert interval */ - timr->it.real.interval = timespec_to_ktime(new_setting->it_interval); - - /* SIGEV_NONE timers are not queued ! See common_timer_get */ - if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) { - /* Setup correct expiry time for relative timers */ - if (mode == HRTIMER_MODE_REL) { - hrtimer_add_expires(timer, timer->base->get_time()); - } - return 0; - } + timr->it_interval = timespec64_to_ktime(new_setting->it_interval); + expires = timespec64_to_ktime(new_setting->it_value); + sigev_none = (timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE; - hrtimer_start_expires(timer, mode); + kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none); + timr->it_active = !sigev_none; return 0; } -/* Set a POSIX.1b interval timer */ -SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, - const struct itimerspec __user *, new_setting, - struct itimerspec __user *, old_setting) +static int do_timer_settime(timer_t timer_id, int flags, + struct itimerspec64 *new_spec64, + struct itimerspec64 *old_spec64) { + const struct k_clock *kc; struct k_itimer *timr; - struct itimerspec new_spec, old_spec; - int error = 0; unsigned long flag; - struct itimerspec *rtn = old_setting ? &old_spec : NULL; - struct k_clock *kc; + int error = 0; - if (!new_setting) + if (!timespec64_valid(&new_spec64->it_interval) || + !timespec64_valid(&new_spec64->it_value)) return -EINVAL; - if (copy_from_user(&new_spec, new_setting, sizeof (new_spec))) - return -EFAULT; - - if (!timespec_valid(&new_spec.it_interval) || - !timespec_valid(&new_spec.it_value)) - return -EINVAL; + if (old_spec64) + memset(old_spec64, 0, sizeof(*old_spec64)); retry: timr = lock_timer(timer_id, &flag); if (!timr) return -EINVAL; - kc = clockid_to_kclock(timr->it_clock); + kc = timr->kclock; if (WARN_ON_ONCE(!kc || !kc->timer_set)) error = -EINVAL; else - error = kc->timer_set(timr, flags, &new_spec, rtn); + error = kc->timer_set(timr, flags, new_spec64, old_spec64); unlock_timer(timr, flag); if (error == TIMER_RETRY) { - rtn = NULL; // We already got the old time... + old_spec64 = NULL; // We already got the old time... goto retry; } - if (old_setting && !error && - copy_to_user(old_setting, &old_spec, sizeof (old_spec))) - error = -EFAULT; + return error; +} + +/* Set a POSIX.1b interval timer */ +SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, + const struct itimerspec __user *, new_setting, + struct itimerspec __user *, old_setting) +{ + struct itimerspec64 new_spec, old_spec; + struct itimerspec64 *rtn = old_setting ? &old_spec : NULL; + int error = 0; + + if (!new_setting) + return -EINVAL; + if (get_itimerspec64(&new_spec, new_setting)) + return -EFAULT; + + error = do_timer_settime(timer_id, flags, &new_spec, rtn); + if (!error && old_setting) { + if (put_itimerspec64(&old_spec, old_setting)) + error = -EFAULT; + } + return error; +} + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, + struct compat_itimerspec __user *, new, + struct compat_itimerspec __user *, old) +{ + struct itimerspec64 new_spec, old_spec; + struct itimerspec64 *rtn = old ? &old_spec : NULL; + int error = 0; + + if (!new) + return -EINVAL; + if (get_compat_itimerspec64(&new_spec, new)) + return -EFAULT; + + error = do_timer_settime(timer_id, flags, &new_spec, rtn); + if (!error && old) { + if (put_compat_itimerspec64(&old_spec, old)) + error = -EFAULT; + } return error; } +#endif -static int common_timer_del(struct k_itimer *timer) +int common_timer_del(struct k_itimer *timer) { - timer->it.real.interval = 0; + const struct k_clock *kc = timer->kclock; - if (hrtimer_try_to_cancel(&timer->it.real.timer) < 0) + timer->it_interval = 0; + if (kc->timer_try_to_cancel(timer) < 0) return TIMER_RETRY; + timer->it_active = 0; return 0; } static inline int timer_delete_hook(struct k_itimer *timer) { - struct k_clock *kc = clockid_to_kclock(timer->it_clock); + const struct k_clock *kc = timer->kclock; if (WARN_ON_ONCE(!kc || !kc->timer_del)) return -EINVAL; @@ -1012,13 +1036,13 @@ void exit_itimers(struct signal_struct *sig) SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, const struct timespec __user *, tp) { - struct k_clock *kc = clockid_to_kclock(which_clock); - struct timespec new_tp; + const struct k_clock *kc = clockid_to_kclock(which_clock); + struct timespec64 new_tp; if (!kc || !kc->clock_set) return -EINVAL; - if (copy_from_user(&new_tp, tp, sizeof (*tp))) + if (get_timespec64(&new_tp, tp)) return -EFAULT; return kc->clock_set(which_clock, &new_tp); @@ -1027,8 +1051,8 @@ SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, struct timespec __user *,tp) { - struct k_clock *kc = clockid_to_kclock(which_clock); - struct timespec kernel_tp; + const struct k_clock *kc = clockid_to_kclock(which_clock); + struct timespec64 kernel_tp; int error; if (!kc) @@ -1036,7 +1060,7 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, error = kc->clock_get(which_clock, &kernel_tp); - if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp))) + if (!error && put_timespec64(&kernel_tp, tp)) error = -EFAULT; return error; @@ -1045,7 +1069,7 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, struct timex __user *, utx) { - struct k_clock *kc = clockid_to_kclock(which_clock); + const struct k_clock *kc = clockid_to_kclock(which_clock); struct timex ktx; int err; @@ -1068,8 +1092,8 @@ SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, struct timespec __user *, tp) { - struct k_clock *kc = clockid_to_kclock(which_clock); - struct timespec rtn_tp; + const struct k_clock *kc = clockid_to_kclock(which_clock); + struct timespec64 rtn_tp; int error; if (!kc) @@ -1077,19 +1101,97 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, error = kc->clock_getres(which_clock, &rtn_tp); - if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp))) + if (!error && tp && put_timespec64(&rtn_tp, tp)) error = -EFAULT; return error; } +#ifdef CONFIG_COMPAT + +COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock, + struct compat_timespec __user *, tp) +{ + const struct k_clock *kc = clockid_to_kclock(which_clock); + struct timespec64 ts; + + if (!kc || !kc->clock_set) + return -EINVAL; + + if (compat_get_timespec64(&ts, tp)) + return -EFAULT; + + return kc->clock_set(which_clock, &ts); +} + +COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock, + struct compat_timespec __user *, tp) +{ + const struct k_clock *kc = clockid_to_kclock(which_clock); + struct timespec64 ts; + int err; + + if (!kc) + return -EINVAL; + + err = kc->clock_get(which_clock, &ts); + + if (!err && compat_put_timespec64(&ts, tp)) + err = -EFAULT; + + return err; +} + +COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock, + struct compat_timex __user *, utp) +{ + const struct k_clock *kc = clockid_to_kclock(which_clock); + struct timex ktx; + int err; + + if (!kc) + return -EINVAL; + if (!kc->clock_adj) + return -EOPNOTSUPP; + + err = compat_get_timex(&ktx, utp); + if (err) + return err; + + err = kc->clock_adj(which_clock, &ktx); + + if (err >= 0) + err = compat_put_timex(utp, &ktx); + + return err; +} + +COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock, + struct compat_timespec __user *, tp) +{ + const struct k_clock *kc = clockid_to_kclock(which_clock); + struct timespec64 ts; + int err; + + if (!kc) + return -EINVAL; + + err = kc->clock_getres(which_clock, &ts); + if (!err && tp && compat_put_timespec64(&ts, tp)) + return -EFAULT; + + return err; +} + +#endif + /* * nanosleep for monotonic and realtime clocks */ static int common_nsleep(const clockid_t which_clock, int flags, - struct timespec *tsave, struct timespec __user *rmtp) + const struct timespec64 *rqtp) { - return hrtimer_nanosleep(tsave, rmtp, flags & TIMER_ABSTIME ? + return hrtimer_nanosleep(rqtp, flags & TIMER_ABSTIME ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL, which_clock); } @@ -1098,34 +1200,152 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, const struct timespec __user *, rqtp, struct timespec __user *, rmtp) { - struct k_clock *kc = clockid_to_kclock(which_clock); - struct timespec t; + const struct k_clock *kc = clockid_to_kclock(which_clock); + struct timespec64 t; if (!kc) return -EINVAL; if (!kc->nsleep) return -ENANOSLEEP_NOTSUP; - if (copy_from_user(&t, rqtp, sizeof (struct timespec))) + if (get_timespec64(&t, rqtp)) return -EFAULT; - if (!timespec_valid(&t)) + if (!timespec64_valid(&t)) return -EINVAL; + if (flags & TIMER_ABSTIME) + rmtp = NULL; + current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; + current->restart_block.nanosleep.rmtp = rmtp; - return kc->nsleep(which_clock, flags, &t, rmtp); + return kc->nsleep(which_clock, flags, &t); } -/* - * This will restart clock_nanosleep. This is required only by - * compat_clock_nanosleep_restart for now. - */ -long clock_nanosleep_restart(struct restart_block *restart_block) +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags, + struct compat_timespec __user *, rqtp, + struct compat_timespec __user *, rmtp) { - clockid_t which_clock = restart_block->nanosleep.clockid; - struct k_clock *kc = clockid_to_kclock(which_clock); + const struct k_clock *kc = clockid_to_kclock(which_clock); + struct timespec64 t; + + if (!kc) + return -EINVAL; + if (!kc->nsleep) + return -ENANOSLEEP_NOTSUP; - if (WARN_ON_ONCE(!kc || !kc->nsleep_restart)) + if (compat_get_timespec64(&t, rqtp)) + return -EFAULT; + + if (!timespec64_valid(&t)) return -EINVAL; + if (flags & TIMER_ABSTIME) + rmtp = NULL; + current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; + current->restart_block.nanosleep.compat_rmtp = rmtp; + + return kc->nsleep(which_clock, flags, &t); +} +#endif - return kc->nsleep_restart(restart_block); +static const struct k_clock clock_realtime = { + .clock_getres = posix_get_hrtimer_res, + .clock_get = posix_clock_realtime_get, + .clock_set = posix_clock_realtime_set, + .clock_adj = posix_clock_realtime_adj, + .nsleep = common_nsleep, + .timer_create = common_timer_create, + .timer_set = common_timer_set, + .timer_get = common_timer_get, + .timer_del = common_timer_del, + .timer_rearm = common_hrtimer_rearm, + .timer_forward = common_hrtimer_forward, + .timer_remaining = common_hrtimer_remaining, + .timer_try_to_cancel = common_hrtimer_try_to_cancel, + .timer_arm = common_hrtimer_arm, +}; + +static const struct k_clock clock_monotonic = { + .clock_getres = posix_get_hrtimer_res, + .clock_get = posix_ktime_get_ts, + .nsleep = common_nsleep, + .timer_create = common_timer_create, + .timer_set = common_timer_set, + .timer_get = common_timer_get, + .timer_del = common_timer_del, + .timer_rearm = common_hrtimer_rearm, + .timer_forward = common_hrtimer_forward, + .timer_remaining = common_hrtimer_remaining, + .timer_try_to_cancel = common_hrtimer_try_to_cancel, + .timer_arm = common_hrtimer_arm, +}; + +static const struct k_clock clock_monotonic_raw = { + .clock_getres = posix_get_hrtimer_res, + .clock_get = posix_get_monotonic_raw, +}; + +static const struct k_clock clock_realtime_coarse = { + .clock_getres = posix_get_coarse_res, + .clock_get = posix_get_realtime_coarse, +}; + +static const struct k_clock clock_monotonic_coarse = { + .clock_getres = posix_get_coarse_res, + .clock_get = posix_get_monotonic_coarse, +}; + +static const struct k_clock clock_tai = { + .clock_getres = posix_get_hrtimer_res, + .clock_get = posix_get_tai, + .nsleep = common_nsleep, + .timer_create = common_timer_create, + .timer_set = common_timer_set, + .timer_get = common_timer_get, + .timer_del = common_timer_del, + .timer_rearm = common_hrtimer_rearm, + .timer_forward = common_hrtimer_forward, + .timer_remaining = common_hrtimer_remaining, + .timer_try_to_cancel = common_hrtimer_try_to_cancel, + .timer_arm = common_hrtimer_arm, +}; + +static const struct k_clock clock_boottime = { + .clock_getres = posix_get_hrtimer_res, + .clock_get = posix_get_boottime, + .nsleep = common_nsleep, + .timer_create = common_timer_create, + .timer_set = common_timer_set, + .timer_get = common_timer_get, + .timer_del = common_timer_del, + .timer_rearm = common_hrtimer_rearm, + .timer_forward = common_hrtimer_forward, + .timer_remaining = common_hrtimer_remaining, + .timer_try_to_cancel = common_hrtimer_try_to_cancel, + .timer_arm = common_hrtimer_arm, +}; + +static const struct k_clock * const posix_clocks[] = { + [CLOCK_REALTIME] = &clock_realtime, + [CLOCK_MONOTONIC] = &clock_monotonic, + [CLOCK_PROCESS_CPUTIME_ID] = &clock_process, + [CLOCK_THREAD_CPUTIME_ID] = &clock_thread, + [CLOCK_MONOTONIC_RAW] = &clock_monotonic_raw, + [CLOCK_REALTIME_COARSE] = &clock_realtime_coarse, + [CLOCK_MONOTONIC_COARSE] = &clock_monotonic_coarse, + [CLOCK_BOOTTIME] = &clock_boottime, + [CLOCK_REALTIME_ALARM] = &alarm_clock, + [CLOCK_BOOTTIME_ALARM] = &alarm_clock, + [CLOCK_TAI] = &clock_tai, +}; + +static const struct k_clock *clockid_to_kclock(const clockid_t id) +{ + if (id < 0) + return (id & CLOCKFD_MASK) == CLOCKFD ? + &clock_posix_dynamic : &clock_posix_cpu; + + if (id >= ARRAY_SIZE(posix_clocks) || !posix_clocks[id]) + return NULL; + return posix_clocks[id]; } diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h new file mode 100644 index 000000000000..fb303c3be4d3 --- /dev/null +++ b/kernel/time/posix-timers.h @@ -0,0 +1,40 @@ +#define TIMER_RETRY 1 + +struct k_clock { + int (*clock_getres)(const clockid_t which_clock, + struct timespec64 *tp); + int (*clock_set)(const clockid_t which_clock, + const struct timespec64 *tp); + int (*clock_get)(const clockid_t which_clock, + struct timespec64 *tp); + int (*clock_adj)(const clockid_t which_clock, struct timex *tx); + int (*timer_create)(struct k_itimer *timer); + int (*nsleep)(const clockid_t which_clock, int flags, + const struct timespec64 *); + int (*timer_set)(struct k_itimer *timr, int flags, + struct itimerspec64 *new_setting, + struct itimerspec64 *old_setting); + int (*timer_del)(struct k_itimer *timr); + void (*timer_get)(struct k_itimer *timr, + struct itimerspec64 *cur_setting); + void (*timer_rearm)(struct k_itimer *timr); + int (*timer_forward)(struct k_itimer *timr, ktime_t now); + ktime_t (*timer_remaining)(struct k_itimer *timr, ktime_t now); + int (*timer_try_to_cancel)(struct k_itimer *timr); + void (*timer_arm)(struct k_itimer *timr, ktime_t expires, + bool absolute, bool sigev_none); +}; + +extern const struct k_clock clock_posix_cpu; +extern const struct k_clock clock_posix_dynamic; +extern const struct k_clock clock_process; +extern const struct k_clock clock_thread; +extern const struct k_clock alarm_clock; + +int posix_timer_event(struct k_itimer *timr, int si_private); + +void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting); +int common_timer_set(struct k_itimer *timr, int flags, + struct itimerspec64 *new_setting, + struct itimerspec64 *old_setting); +int common_timer_del(struct k_itimer *timer); diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index a26036d37a38..2d8f05aad442 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/moduleparam.h> #include <linux/sched.h> +#include <linux/sched/clock.h> #include <linux/syscore_ops.h> #include <linux/hrtimer.h> #include <linux/sched_clock.h> @@ -205,6 +206,11 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) update_clock_read_data(&rd); + if (sched_clock_timer.function != NULL) { + /* update timeout for clock wrap */ + hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL); + } + r = rate; if (r >= 4000000) { r /= 1000000; diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 3109204c87cc..b398c2ea69b2 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -29,16 +29,19 @@ */ static struct tick_device tick_broadcast_device; -static cpumask_var_t tick_broadcast_mask; -static cpumask_var_t tick_broadcast_on; -static cpumask_var_t tmpmask; -static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); +static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly; +static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly; +static cpumask_var_t tmpmask __cpumask_var_read_mostly; static int tick_broadcast_forced; +static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); + #ifdef CONFIG_TICK_ONESHOT +static void tick_broadcast_setup_oneshot(struct clock_event_device *bc); static void tick_broadcast_clear_oneshot(int cpu); static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); #else +static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline void tick_broadcast_clear_oneshot(int cpu) { } static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } #endif @@ -347,17 +350,16 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) * * Called when the system enters a state where affected tick devices * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. - * - * Called with interrupts disabled, so clockevents_lock is not - * required here because the local clock event device cannot go away - * under us. */ void tick_broadcast_control(enum tick_broadcast_mode mode) { struct clock_event_device *bc, *dev; struct tick_device *td; int cpu, bc_stopped; + unsigned long flags; + /* Protects also the local clockevent device. */ + raw_spin_lock_irqsave(&tick_broadcast_lock, flags); td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; @@ -365,12 +367,11 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) * Is the device not affected by the powerstate ? */ if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) - return; + goto out; if (!tick_device_is_functional(dev)) - return; + goto out; - raw_spin_lock(&tick_broadcast_lock); cpu = smp_processor_id(); bc = tick_broadcast_device.evtdev; bc_stopped = cpumask_empty(tick_broadcast_mask); @@ -420,7 +421,8 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) tick_broadcast_setup_oneshot(bc); } } - raw_spin_unlock(&tick_broadcast_lock); +out: + raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); } EXPORT_SYMBOL_GPL(tick_broadcast_control); @@ -517,9 +519,9 @@ void tick_resume_broadcast(void) #ifdef CONFIG_TICK_ONESHOT -static cpumask_var_t tick_broadcast_oneshot_mask; -static cpumask_var_t tick_broadcast_pending_mask; -static cpumask_var_t tick_broadcast_force_mask; +static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly; +static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly; +static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly; /* * Exposed for debugging: see timer_list.c @@ -867,7 +869,7 @@ static void tick_broadcast_init_next_event(struct cpumask *mask, /** * tick_broadcast_setup_oneshot - setup the broadcast device */ -void tick_broadcast_setup_oneshot(struct clock_event_device *bc) +static void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { int cpu = smp_processor_id(); diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index f738251000fe..be0ac01f2e12 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -126,7 +126,6 @@ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; } /* Functions related to oneshot broadcasting */ #if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) -extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc); extern void tick_broadcast_switch_to_oneshot(void); extern void tick_shutdown_broadcast_oneshot(unsigned int cpu); extern int tick_broadcast_oneshot_active(void); @@ -134,7 +133,6 @@ extern void tick_check_oneshot_broadcast_this_cpu(void); bool tick_broadcast_oneshot_available(void); extern struct cpumask *tick_get_broadcast_oneshot_mask(void); #else /* !(BROADCAST && ONESHOT): */ -static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } static inline void tick_broadcast_switch_to_oneshot(void) { } static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { } static inline int tick_broadcast_oneshot_active(void) { return 0; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 74e0388cc88d..c7a899c5ce64 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -17,8 +17,12 @@ #include <linux/interrupt.h> #include <linux/kernel_stat.h> #include <linux/percpu.h> +#include <linux/nmi.h> #include <linux/profile.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> +#include <linux/sched/clock.h> +#include <linux/sched/stat.h> +#include <linux/sched/nohz.h> #include <linux/module.h> #include <linux/irq_work.h> #include <linux/posix-timers.h> @@ -146,6 +150,12 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) touch_softlockup_watchdog_sched(); if (is_idle_task(current)) ts->idle_jiffies++; + /* + * In case the current tick fired too early past its expected + * expiration, make sure we don't bypass the next clock reprogramming + * to the same deadline. + */ + ts->next_tick = 0; } #endif update_process_times(user_mode(regs)); @@ -550,7 +560,7 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now) update_ts_time_stats(smp_processor_id(), ts, now, NULL); ts->idle_active = 0; - sched_clock_idle_wakeup_event(0); + sched_clock_idle_wakeup_event(); } static ktime_t tick_nohz_start_idle(struct tick_sched *ts) @@ -656,6 +666,12 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); else tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); + + /* + * Reset to make sure next tick stop doesn't get fooled by past + * cached clock deadline. + */ + ts->next_tick = 0; } static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, @@ -697,8 +713,6 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, */ delta = next_tick - basemono; if (delta <= (u64)TICK_NSEC) { - tick = 0; - /* * Tell the timer code that the base is not idle, i.e. undo * the effect of get_next_timer_interrupt(): @@ -708,23 +722,8 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, * We've not stopped the tick yet, and there's a timer in the * next period, so no point in stopping it either, bail. */ - if (!ts->tick_stopped) - goto out; - - /* - * If, OTOH, we did stop it, but there's a pending (expired) - * timer reprogram the timer hardware to fire now. - * - * We will not restart the tick proper, just prod the timer - * hardware into firing an interrupt to process the pending - * timers. Just like tick_irq_exit() will not restart the tick - * for 'normal' interrupts. - * - * Only once we exit the idle loop will we re-enable the tick, - * see tick_nohz_idle_exit(). - */ - if (delta == 0) { - tick_nohz_restart(ts, now); + if (!ts->tick_stopped) { + tick = 0; goto out; } } @@ -767,8 +766,16 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, tick = expires; /* Skip reprogram of event if its not changed */ - if (ts->tick_stopped && (expires == ts->next_tick)) - goto out; + if (ts->tick_stopped && (expires == ts->next_tick)) { + /* Sanity check: make sure clockevent is actually programmed */ + if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer)) + goto out; + + WARN_ON_ONCE(1); + printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n", + basemono, ts->next_tick, dev->next_event, + hrtimer_active(&ts->sched_timer), hrtimer_get_expires(&ts->sched_timer)); + } /* * nohz_stop_sched_tick can be called several times before @@ -778,8 +785,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, * the scheduler tick in nohz_restart_sched_tick. */ if (!ts->tick_stopped) { - nohz_balance_enter_idle(cpu); - calc_load_enter_idle(); + calc_load_nohz_start(); cpu_load_update_nohz_start(); ts->last_tick = hrtimer_get_expires(&ts->sched_timer); @@ -799,8 +805,10 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, goto out; } + hrtimer_set_expires(&ts->sched_timer, tick); + if (ts->nohz_mode == NOHZ_MODE_HIGHRES) - hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED); + hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); else tick_program_event(tick, 1); out: @@ -824,7 +832,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) */ timer_clear_idle(); - calc_load_exit_idle(); + calc_load_nohz_stop(); touch_softlockup_watchdog_sched(); /* * Cancel the scheduled timer and restore the tick @@ -865,6 +873,11 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts) if (unlikely(!cpu_online(cpu))) { if (cpu == tick_do_timer_cpu) tick_do_timer_cpu = TICK_DO_TIMER_NONE; + /* + * Make sure the CPU doesn't get fooled by obsolete tick + * deadline if it comes back online later. + */ + ts->next_tick = 0; return false; } @@ -924,8 +937,10 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts) ts->idle_expires = expires; } - if (!was_stopped && ts->tick_stopped) + if (!was_stopped && ts->tick_stopped) { ts->idle_jiffies = ts->last_jiffies; + nohz_balance_enter_idle(cpu); + } } } @@ -994,6 +1009,18 @@ ktime_t tick_nohz_get_sleep_length(void) return ts->sleep_length; } +/** + * tick_nohz_get_idle_calls - return the current idle calls counter value + * + * Called from the schedutil frequency scaling governor in scheduler context. + */ +unsigned long tick_nohz_get_idle_calls(void) +{ + struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); + + return ts->idle_calls; +} + static void tick_nohz_account_idle_ticks(struct tick_sched *ts) { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE @@ -1161,6 +1188,8 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) */ if (regs) tick_sched_handle(ts, regs); + else + ts->next_tick = 0; /* No need to reprogram if we are in idle or full dynticks mode */ if (unlikely(ts->tick_stopped)) diff --git a/kernel/time/time.c b/kernel/time/time.c index a3a9a8a029dc..44a8c1402133 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -39,6 +39,7 @@ #include <linux/ptrace.h> #include <linux/uaccess.h> +#include <linux/compat.h> #include <asm/unistd.h> #include <generated/timeconst.h> @@ -99,6 +100,47 @@ SYSCALL_DEFINE1(stime, time_t __user *, tptr) #endif /* __ARCH_WANT_SYS_TIME */ +#ifdef CONFIG_COMPAT +#ifdef __ARCH_WANT_COMPAT_SYS_TIME + +/* compat_time_t is a 32 bit "long" and needs to get converted. */ +COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc) +{ + struct timeval tv; + compat_time_t i; + + do_gettimeofday(&tv); + i = tv.tv_sec; + + if (tloc) { + if (put_user(i,tloc)) + return -EFAULT; + } + force_successful_syscall_return(); + return i; +} + +COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr) +{ + struct timespec tv; + int err; + + if (get_user(tv.tv_sec, tptr)) + return -EFAULT; + + tv.tv_nsec = 0; + + err = security_settime(&tv, NULL); + if (err) + return err; + + do_settimeofday(&tv); + return 0; +} + +#endif /* __ARCH_WANT_COMPAT_SYS_TIME */ +#endif + SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, struct timezone __user *, tz) { @@ -193,8 +235,8 @@ int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv, struct timezone __user *, tz) { + struct timespec64 new_ts; struct timeval user_tv; - struct timespec new_ts; struct timezone new_tz; if (tv) { @@ -212,9 +254,50 @@ SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv, return -EFAULT; } - return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); + return do_sys_settimeofday64(tv ? &new_ts : NULL, tz ? &new_tz : NULL); } +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE2(gettimeofday, struct compat_timeval __user *, tv, + struct timezone __user *, tz) +{ + if (tv) { + struct timeval ktv; + + do_gettimeofday(&ktv); + if (compat_put_timeval(&ktv, tv)) + return -EFAULT; + } + if (tz) { + if (copy_to_user(tz, &sys_tz, sizeof(sys_tz))) + return -EFAULT; + } + + return 0; +} + +COMPAT_SYSCALL_DEFINE2(settimeofday, struct compat_timeval __user *, tv, + struct timezone __user *, tz) +{ + struct timespec64 new_ts; + struct timeval user_tv; + struct timezone new_tz; + + if (tv) { + if (compat_get_timeval(&user_tv, tv)) + return -EFAULT; + new_ts.tv_sec = user_tv.tv_sec; + new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC; + } + if (tz) { + if (copy_from_user(&new_tz, tz, sizeof(*tz))) + return -EFAULT; + } + + return do_sys_settimeofday64(tv ? &new_ts : NULL, tz ? &new_tz : NULL); +} +#endif + SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) { struct timex txc; /* Local copy of parameter */ @@ -224,25 +307,32 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) * structure. But bear in mind that the structures * may change */ - if(copy_from_user(&txc, txc_p, sizeof(struct timex))) + if (copy_from_user(&txc, txc_p, sizeof(struct timex))) return -EFAULT; ret = do_adjtimex(&txc); return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; } -/** - * current_fs_time - Return FS time - * @sb: Superblock. - * - * Return the current time truncated to the time granularity supported by - * the fs. - */ -struct timespec current_fs_time(struct super_block *sb) +#ifdef CONFIG_COMPAT + +COMPAT_SYSCALL_DEFINE1(adjtimex, struct compat_timex __user *, utp) { - struct timespec now = current_kernel_time(); - return timespec_trunc(now, sb->s_time_gran); + struct timex txc; + int err, ret; + + err = compat_get_timex(&txc, utp); + if (err) + return err; + + ret = do_adjtimex(&txc); + + err = compat_put_timex(utp, &txc); + if (err) + return err; + + return ret; } -EXPORT_SYMBOL(current_fs_time); +#endif /* * Convert jiffies to milliseconds and back. @@ -702,6 +792,16 @@ u64 nsec_to_clock_t(u64 x) #endif } +u64 jiffies64_to_nsecs(u64 j) +{ +#if !(NSEC_PER_SEC % HZ) + return (NSEC_PER_SEC / HZ) * j; +# else + return div_u64(j * HZ_TO_NSEC_NUM, HZ_TO_NSEC_DEN); +#endif +} +EXPORT_SYMBOL(jiffies64_to_nsecs); + /** * nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64 * @@ -790,3 +890,61 @@ struct timespec64 timespec64_add_safe(const struct timespec64 lhs, return res; } + +int get_timespec64(struct timespec64 *ts, + const struct timespec __user *uts) +{ + struct timespec kts; + int ret; + + ret = copy_from_user(&kts, uts, sizeof(kts)); + if (ret) + return -EFAULT; + + ts->tv_sec = kts.tv_sec; + ts->tv_nsec = kts.tv_nsec; + + return 0; +} +EXPORT_SYMBOL_GPL(get_timespec64); + +int put_timespec64(const struct timespec64 *ts, + struct timespec __user *uts) +{ + struct timespec kts = { + .tv_sec = ts->tv_sec, + .tv_nsec = ts->tv_nsec + }; + return copy_to_user(uts, &kts, sizeof(kts)) ? -EFAULT : 0; +} +EXPORT_SYMBOL_GPL(put_timespec64); + +int get_itimerspec64(struct itimerspec64 *it, + const struct itimerspec __user *uit) +{ + int ret; + + ret = get_timespec64(&it->it_interval, &uit->it_interval); + if (ret) + return ret; + + ret = get_timespec64(&it->it_value, &uit->it_value); + + return ret; +} +EXPORT_SYMBOL_GPL(get_itimerspec64); + +int put_itimerspec64(const struct itimerspec64 *it, + struct itimerspec __user *uit) +{ + int ret; + + ret = put_timespec64(&it->it_interval, &uit->it_interval); + if (ret) + return ret; + + ret = put_timespec64(&it->it_value, &uit->it_value); + + return ret; +} +EXPORT_SYMBOL_GPL(put_itimerspec64); diff --git a/kernel/time/timeconst.bc b/kernel/time/timeconst.bc index c48688904f9f..f83bbb81600b 100644 --- a/kernel/time/timeconst.bc +++ b/kernel/time/timeconst.bc @@ -98,6 +98,12 @@ define timeconst(hz) { print "#define HZ_TO_USEC_DEN\t\t", hz/cd, "\n" print "#define USEC_TO_HZ_NUM\t\t", hz/cd, "\n" print "#define USEC_TO_HZ_DEN\t\t", 1000000/cd, "\n" + + cd=gcd(hz,1000000000) + print "#define HZ_TO_NSEC_NUM\t\t", 1000000000/cd, "\n" + print "#define HZ_TO_NSEC_DEN\t\t", hz/cd, "\n" + print "#define NSEC_TO_HZ_NUM\t\t", hz/cd, "\n" + print "#define NSEC_TO_HZ_DEN\t\t", 1000000000/cd, "\n" print "\n" print "#endif /* KERNEL_TIMECONST_H */\n" diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index db087d7e106d..cedafa008de5 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -14,7 +14,9 @@ #include <linux/percpu.h> #include <linux/init.h> #include <linux/mm.h> +#include <linux/nmi.h> #include <linux/sched.h> +#include <linux/sched/loadavg.h> #include <linux/syscore_ops.h> #include <linux/clocksource.h> #include <linux/jiffies.h> @@ -70,6 +72,10 @@ static inline void tk_normalize_xtime(struct timekeeper *tk) tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift; tk->xtime_sec++; } + while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) { + tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift; + tk->raw_sec++; + } } static inline struct timespec64 tk_xtime(struct timekeeper *tk) @@ -116,6 +122,26 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta) tk->offs_boot = ktime_add(tk->offs_boot, delta); } +/* + * tk_clock_read - atomic clocksource read() helper + * + * This helper is necessary to use in the read paths because, while the + * seqlock ensures we don't return a bad value while structures are updated, + * it doesn't protect from potential crashes. There is the possibility that + * the tkr's clocksource may change between the read reference, and the + * clock reference passed to the read function. This can cause crashes if + * the wrong clocksource is passed to the wrong read function. + * This isn't necessary to use when holding the timekeeper_lock or doing + * a read of the fast-timekeeper tkrs (which is protected by its own locking + * and update logic). + */ +static inline u64 tk_clock_read(struct tk_read_base *tkr) +{ + struct clocksource *clock = READ_ONCE(tkr->clock); + + return clock->read(clock); +} + #ifdef CONFIG_DEBUG_TIMEKEEPING #define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ @@ -173,7 +199,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr) */ do { seq = read_seqcount_begin(&tk_core.seq); - now = tkr->read(tkr->clock); + now = tk_clock_read(tkr); last = tkr->cycle_last; mask = tkr->mask; max = tkr->clock->max_cycles; @@ -207,7 +233,7 @@ static inline u64 timekeeping_get_delta(struct tk_read_base *tkr) u64 cycle_now, delta; /* read clocksource */ - cycle_now = tkr->read(tkr->clock); + cycle_now = tk_clock_read(tkr); /* calculate the delta since the last update_wall_time */ delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); @@ -236,12 +262,10 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) ++tk->cs_was_changed_seq; old_clock = tk->tkr_mono.clock; tk->tkr_mono.clock = clock; - tk->tkr_mono.read = clock->read; tk->tkr_mono.mask = clock->mask; - tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock); + tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono); tk->tkr_raw.clock = clock; - tk->tkr_raw.read = clock->read; tk->tkr_raw.mask = clock->mask; tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last; @@ -260,17 +284,19 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) /* Go back from cycles -> shifted ns */ tk->xtime_interval = interval * clock->mult; tk->xtime_remainder = ntpinterval - tk->xtime_interval; - tk->raw_interval = (interval * clock->mult) >> clock->shift; + tk->raw_interval = interval * clock->mult; /* if changing clocks, convert xtime_nsec shift units */ if (old_clock) { int shift_change = clock->shift - old_clock->shift; - if (shift_change < 0) + if (shift_change < 0) { tk->tkr_mono.xtime_nsec >>= -shift_change; - else + tk->tkr_raw.xtime_nsec >>= -shift_change; + } else { tk->tkr_mono.xtime_nsec <<= shift_change; + tk->tkr_raw.xtime_nsec <<= shift_change; + } } - tk->tkr_raw.xtime_nsec = 0; tk->tkr_mono.shift = clock->shift; tk->tkr_raw.shift = clock->shift; @@ -402,7 +428,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) now += timekeeping_delta_to_ns(tkr, clocksource_delta( - tkr->read(tkr->clock), + tk_clock_read(tkr), tkr->cycle_last, tkr->mask)); } while (read_seqcount_retry(&tkf->seq, seq)); @@ -459,6 +485,10 @@ static u64 dummy_clock_read(struct clocksource *cs) return cycles_at_suspend; } +static struct clocksource dummy_clock = { + .read = dummy_clock_read, +}; + /** * halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource. * @tk: Timekeeper to snapshot. @@ -475,17 +505,18 @@ static void halt_fast_timekeeper(struct timekeeper *tk) struct tk_read_base *tkr = &tk->tkr_mono; memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); - cycles_at_suspend = tkr->read(tkr->clock); - tkr_dummy.read = dummy_clock_read; + cycles_at_suspend = tk_clock_read(tkr); + tkr_dummy.clock = &dummy_clock; update_fast_timekeeper(&tkr_dummy, &tk_fast_mono); tkr = &tk->tkr_raw; memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy)); - tkr_dummy.read = dummy_clock_read; + tkr_dummy.clock = &dummy_clock; update_fast_timekeeper(&tkr_dummy, &tk_fast_raw); } #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD +#warning Please contact your maintainers, as GENERIC_TIME_VSYSCALL_OLD compatibity will disappear soon. static inline void update_vsyscall(struct timekeeper *tk) { @@ -595,9 +626,6 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) nsec = (u32) tk->wall_to_monotonic.tv_nsec; tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); - /* Update the monotonic raw base */ - tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time); - /* * The sum of the nanoseconds portions of xtime and * wall_to_monotonic can be greater/equal one second. Take @@ -607,6 +635,11 @@ static inline void tk_update_ktime_data(struct timekeeper *tk) if (nsec >= NSEC_PER_SEC) seconds++; tk->ktime_sec = seconds; + + /* Update the monotonic raw base */ + seconds = tk->raw_sec; + nsec = (u32)(tk->tkr_raw.xtime_nsec >> tk->tkr_raw.shift); + tk->tkr_raw.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec); } /* must hold timekeeper_lock */ @@ -647,11 +680,9 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) */ static void timekeeping_forward_now(struct timekeeper *tk) { - struct clocksource *clock = tk->tkr_mono.clock; u64 cycle_now, delta; - u64 nsec; - cycle_now = tk->tkr_mono.read(clock); + cycle_now = tk_clock_read(&tk->tkr_mono); delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask); tk->tkr_mono.cycle_last = cycle_now; tk->tkr_raw.cycle_last = cycle_now; @@ -661,10 +692,13 @@ static void timekeeping_forward_now(struct timekeeper *tk) /* If arch requires, add in get_arch_timeoffset() */ tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift; - tk_normalize_xtime(tk); - nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift); - timespec64_add_ns(&tk->raw_time, nsec); + tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult; + + /* If arch requires, add in get_arch_timeoffset() */ + tk->tkr_raw.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_raw.shift; + + tk_normalize_xtime(tk); } /** @@ -927,8 +961,7 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) do { seq = read_seqcount_begin(&tk_core.seq); - - now = tk->tkr_mono.read(tk->tkr_mono.clock); + now = tk_clock_read(&tk->tkr_mono); systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq; systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; base_real = ktime_add(tk->tkr_mono.base, @@ -994,8 +1027,7 @@ static int adjust_historical_crosststamp(struct system_time_snapshot *history, return 0; /* Interpolate shortest distance from beginning or end of history */ - interp_forward = partial_history_cycles > total_history_cycles/2 ? - true : false; + interp_forward = partial_history_cycles > total_history_cycles / 2; partial_history_cycles = interp_forward ? total_history_cycles - partial_history_cycles : partial_history_cycles; @@ -1107,7 +1139,7 @@ int get_device_system_crosststamp(int (*get_time_fn) * Check whether the system counter value provided by the * device driver is on the current timekeeping interval. */ - now = tk->tkr_mono.read(tk->tkr_mono.clock); + now = tk_clock_read(&tk->tkr_mono); interval_start = tk->tkr_mono.cycle_last; if (!cycle_between(interval_start, cycles, now)) { clock_was_set_seq = tk->clock_was_set_seq; @@ -1275,27 +1307,8 @@ error: /* even if we error out, we forwarded the time, so call update */ } EXPORT_SYMBOL(timekeeping_inject_offset); - -/** - * timekeeping_get_tai_offset - Returns current TAI offset from UTC - * - */ -s32 timekeeping_get_tai_offset(void) -{ - struct timekeeper *tk = &tk_core.timekeeper; - unsigned int seq; - s32 ret; - - do { - seq = read_seqcount_begin(&tk_core.seq); - ret = tk->tai_offset; - } while (read_seqcount_retry(&tk_core.seq, seq)); - - return ret; -} - /** - * __timekeeping_set_tai_offset - Lock free worker function + * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic * */ static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) @@ -1305,24 +1318,6 @@ static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) } /** - * timekeeping_set_tai_offset - Sets the current TAI offset from UTC - * - */ -void timekeeping_set_tai_offset(s32 tai_offset) -{ - struct timekeeper *tk = &tk_core.timekeeper; - unsigned long flags; - - raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&tk_core.seq); - __timekeeping_set_tai_offset(tk, tai_offset); - timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); - write_seqcount_end(&tk_core.seq); - raw_spin_unlock_irqrestore(&timekeeper_lock, flags); - clock_was_set(); -} - -/** * change_clocksource - Swaps clocksources if a new one is available * * Accumulates current time interval and initializes new clocksource @@ -1389,19 +1384,18 @@ int timekeeping_notify(struct clocksource *clock) void getrawmonotonic64(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; - struct timespec64 ts64; unsigned long seq; u64 nsecs; do { seq = read_seqcount_begin(&tk_core.seq); + ts->tv_sec = tk->raw_sec; nsecs = timekeeping_get_ns(&tk->tkr_raw); - ts64 = tk->raw_time; } while (read_seqcount_retry(&tk_core.seq, seq)); - timespec64_add_ns(&ts64, nsecs); - *ts = ts64; + ts->tv_nsec = 0; + timespec64_add_ns(ts, nsecs); } EXPORT_SYMBOL(getrawmonotonic64); @@ -1525,8 +1519,7 @@ void __init timekeeping_init(void) tk_setup_internals(tk, clock); tk_set_xtime(tk, &now); - tk->raw_time.tv_sec = 0; - tk->raw_time.tv_nsec = 0; + tk->raw_sec = 0; if (boot.tv_sec == 0 && boot.tv_nsec == 0) boot = tk_xtime(tk); @@ -1665,7 +1658,7 @@ void timekeeping_resume(void) * The less preferred source will only be tried if there is no better * usable source. The rtc part is handled separately in rtc core code. */ - cycle_now = tk->tkr_mono.read(clock); + cycle_now = tk_clock_read(&tk->tkr_mono); if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) && cycle_now > tk->tkr_mono.cycle_last) { u64 nsec, cyc_delta; @@ -2012,7 +2005,7 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, u32 shift, unsigned int *clock_set) { u64 interval = tk->cycle_interval << shift; - u64 raw_nsecs; + u64 snsec_per_sec; /* If the offset is smaller than a shifted interval, do nothing */ if (offset < interval) @@ -2027,14 +2020,12 @@ static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset, *clock_set |= accumulate_nsecs_to_secs(tk); /* Accumulate raw time */ - raw_nsecs = (u64)tk->raw_interval << shift; - raw_nsecs += tk->raw_time.tv_nsec; - if (raw_nsecs >= NSEC_PER_SEC) { - u64 raw_secs = raw_nsecs; - raw_nsecs = do_div(raw_secs, NSEC_PER_SEC); - tk->raw_time.tv_sec += raw_secs; + tk->tkr_raw.xtime_nsec += tk->raw_interval << shift; + snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift; + while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) { + tk->tkr_raw.xtime_nsec -= snsec_per_sec; + tk->raw_sec++; } - tk->raw_time.tv_nsec = raw_nsecs; /* Accumulate error between NTP and clock interval */ tk->ntp_error += tk->ntp_tick << shift; @@ -2066,7 +2057,7 @@ void update_wall_time(void) #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET offset = real_tk->cycle_interval; #else - offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock), + offset = clocksource_delta(tk_clock_read(&tk->tkr_mono), tk->tkr_mono.cycle_last, tk->tkr_mono.mask); #endif diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 704f595ce83f..d0914676d4c5 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -11,8 +11,6 @@ extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, extern int timekeeping_valid_for_hres(void); extern u64 timekeeping_max_deferment(void); extern int timekeeping_inject_offset(struct timespec *ts); -extern s32 timekeeping_get_tai_offset(void); -extern void timekeeping_set_tai_offset(s32 tai_offset); extern int timekeeping_suspend(void); extern void timekeeping_resume(void); diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index ca9fb800336b..38bc4d2208e8 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -75,7 +75,7 @@ void tk_debug_account_sleep_time(struct timespec64 *t) int bin = min(fls(t->tv_sec), NUM_BINS-1); sleep_time_bin[bin]++; - pr_info("Suspended for %lld.%03lu seconds\n", (s64)t->tv_sec, - t->tv_nsec / NSEC_PER_MSEC); + printk_deferred(KERN_INFO "Suspended for %lld.%03lu seconds\n", + (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC); } diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ec33a6933eae..71ce3f4eead3 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -38,8 +38,10 @@ #include <linux/tick.h> #include <linux/kallsyms.h> #include <linux/irq_work.h> -#include <linux/sched.h> +#include <linux/sched/signal.h> #include <linux/sched/sysctl.h> +#include <linux/sched/nohz.h> +#include <linux/sched/debug.h> #include <linux/slab.h> #include <linux/compat.h> @@ -193,7 +195,7 @@ EXPORT_SYMBOL(jiffies_64); #endif struct timer_base { - spinlock_t lock; + raw_spinlock_t lock; struct timer_list *running_timer; unsigned long clk; unsigned long next_expiry; @@ -239,7 +241,7 @@ int timer_migration_handler(struct ctl_table *table, int write, int ret; mutex_lock(&mutex); - ret = proc_dointvec(table, write, buffer, lenp, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!ret && write) timers_update_migration(false); mutex_unlock(&mutex); @@ -571,38 +573,6 @@ internal_add_timer(struct timer_base *base, struct timer_list *timer) trigger_dyntick_cpu(base, timer); } -#ifdef CONFIG_TIMER_STATS -void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) -{ - if (timer->start_site) - return; - - timer->start_site = addr; - memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); - timer->start_pid = current->pid; -} - -static void timer_stats_account_timer(struct timer_list *timer) -{ - void *site; - - /* - * start_site can be concurrently reset by - * timer_stats_timer_clear_start_info() - */ - site = READ_ONCE(timer->start_site); - if (likely(!site)) - return; - - timer_stats_update_stats(timer, timer->start_pid, site, - timer->function, timer->start_comm, - timer->flags); -} - -#else -static void timer_stats_account_timer(struct timer_list *timer) {} -#endif - #ifdef CONFIG_DEBUG_OBJECTS_TIMERS static struct debug_obj_descr timer_debug_descr; @@ -789,11 +759,6 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags, { timer->entry.pprev = NULL; timer->flags = flags | raw_smp_processor_id(); -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; - timer->start_pid = -1; - memset(timer->start_comm, 0, TASK_COMM_LEN); -#endif lockdep_init_map(&timer->lockdep_map, name, key, 0); } @@ -948,10 +913,10 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, if (!(tf & TIMER_MIGRATING)) { base = get_timer_base(tf); - spin_lock_irqsave(&base->lock, *flags); + raw_spin_lock_irqsave(&base->lock, *flags); if (timer->flags == tf) return base; - spin_unlock_irqrestore(&base->lock, *flags); + raw_spin_unlock_irqrestore(&base->lock, *flags); } cpu_relax(); } @@ -1001,8 +966,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) base = lock_timer_base(timer, &flags); } - timer_stats_timer_set_start_info(timer); - ret = detach_if_pending(timer, base, false); if (!ret && pending_only) goto out_unlock; @@ -1023,9 +986,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) /* See the comment in lock_timer_base() */ timer->flags |= TIMER_MIGRATING; - spin_unlock(&base->lock); + raw_spin_unlock(&base->lock); base = new_base; - spin_lock(&base->lock); + raw_spin_lock(&base->lock); WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | base->cpu); } @@ -1050,7 +1013,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } out_unlock: - spin_unlock_irqrestore(&base->lock, flags); + raw_spin_unlock_irqrestore(&base->lock, flags); return ret; } @@ -1130,7 +1093,6 @@ void add_timer_on(struct timer_list *timer, int cpu) struct timer_base *new_base, *base; unsigned long flags; - timer_stats_timer_set_start_info(timer); BUG_ON(timer_pending(timer) || !timer->function); new_base = get_timer_cpu_base(timer->flags, cpu); @@ -1144,21 +1106,21 @@ void add_timer_on(struct timer_list *timer, int cpu) if (base != new_base) { timer->flags |= TIMER_MIGRATING; - spin_unlock(&base->lock); + raw_spin_unlock(&base->lock); base = new_base; - spin_lock(&base->lock); + raw_spin_lock(&base->lock); WRITE_ONCE(timer->flags, (timer->flags & ~TIMER_BASEMASK) | cpu); } debug_activate(timer, timer->expires); internal_add_timer(base, timer); - spin_unlock_irqrestore(&base->lock, flags); + raw_spin_unlock_irqrestore(&base->lock, flags); } EXPORT_SYMBOL_GPL(add_timer_on); /** - * del_timer - deactive a timer. + * del_timer - deactivate a timer. * @timer: the timer to be deactivated * * del_timer() deactivates a timer - this works on both active and inactive @@ -1176,11 +1138,10 @@ int del_timer(struct timer_list *timer) debug_assert_init(timer); - timer_stats_timer_clear_start_info(timer); if (timer_pending(timer)) { base = lock_timer_base(timer, &flags); ret = detach_if_pending(timer, base, true); - spin_unlock_irqrestore(&base->lock, flags); + raw_spin_unlock_irqrestore(&base->lock, flags); } return ret; @@ -1189,7 +1150,7 @@ EXPORT_SYMBOL(del_timer); /** * try_to_del_timer_sync - Try to deactivate a timer - * @timer: timer do del + * @timer: timer to delete * * This function tries to deactivate a timer. Upon successful (ret >= 0) * exit the timer is not queued and the handler is not running on any CPU. @@ -1204,11 +1165,10 @@ int try_to_del_timer_sync(struct timer_list *timer) base = lock_timer_base(timer, &flags); - if (base->running_timer != timer) { - timer_stats_timer_clear_start_info(timer); + if (base->running_timer != timer) ret = detach_if_pending(timer, base, true); - } - spin_unlock_irqrestore(&base->lock, flags); + + raw_spin_unlock_irqrestore(&base->lock, flags); return ret; } @@ -1331,7 +1291,6 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) unsigned long data; timer = hlist_entry(head->first, struct timer_list, entry); - timer_stats_account_timer(timer); base->running_timer = timer; detach_timer(timer, true); @@ -1340,13 +1299,13 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) data = timer->data; if (timer->flags & TIMER_IRQSAFE) { - spin_unlock(&base->lock); + raw_spin_unlock(&base->lock); call_timer_fn(timer, fn, data); - spin_lock(&base->lock); + raw_spin_lock(&base->lock); } else { - spin_unlock_irq(&base->lock); + raw_spin_unlock_irq(&base->lock); call_timer_fn(timer, fn, data); - spin_lock_irq(&base->lock); + raw_spin_lock_irq(&base->lock); } } } @@ -1515,7 +1474,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if (cpu_is_offline(smp_processor_id())) return expires; - spin_lock(&base->lock); + raw_spin_lock(&base->lock); nextevt = __next_timer_interrupt(base); is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); base->next_expiry = nextevt; @@ -1543,7 +1502,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if ((expires - basem) > TICK_NSEC) base->is_idle = true; } - spin_unlock(&base->lock); + raw_spin_unlock(&base->lock); return cmp_next_hrtimer_event(basem, expires); } @@ -1631,7 +1590,7 @@ static inline void __run_timers(struct timer_base *base) if (!time_after_eq(jiffies, base->clk)) return; - spin_lock_irq(&base->lock); + raw_spin_lock_irq(&base->lock); while (time_after_eq(jiffies, base->clk)) { @@ -1642,7 +1601,7 @@ static inline void __run_timers(struct timer_base *base) expire_timers(base, heads + levels); } base->running_timer = NULL; - spin_unlock_irq(&base->lock); + raw_spin_unlock_irq(&base->lock); } /* @@ -1827,16 +1786,16 @@ int timers_dead_cpu(unsigned int cpu) * The caller is globally serialized and nobody else * takes two locks at once, deadlock is not possible. */ - spin_lock_irq(&new_base->lock); - spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); + raw_spin_lock_irq(&new_base->lock); + raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); BUG_ON(old_base->running_timer); for (i = 0; i < WHEEL_SIZE; i++) migrate_timer_list(new_base, old_base->vectors + i); - spin_unlock(&old_base->lock); - spin_unlock_irq(&new_base->lock); + raw_spin_unlock(&old_base->lock); + raw_spin_unlock_irq(&new_base->lock); put_cpu_ptr(&timer_bases); } return 0; @@ -1852,7 +1811,7 @@ static void __init init_timer_cpu(int cpu) for (i = 0; i < NR_BASES; i++) { base = per_cpu_ptr(&timer_bases[i], cpu); base->cpu = cpu; - spin_lock_init(&base->lock); + raw_spin_lock_init(&base->lock); base->clk = jiffies; } } @@ -1868,7 +1827,6 @@ static void __init init_timer_cpus(void) void __init init_timers(void) { init_timer_cpus(); - init_timer_stats(); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index afe6cd1944fc..0e7f5428a148 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -16,6 +16,7 @@ #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/kallsyms.h> +#include <linux/nmi.h> #include <linux/uaccess.h> @@ -62,21 +63,11 @@ static void print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer, int idx, u64 now) { -#ifdef CONFIG_TIMER_STATS - char tmp[TASK_COMM_LEN + 1]; -#endif SEQ_printf(m, " #%d: ", idx); print_name_offset(m, taddr); SEQ_printf(m, ", "); print_name_offset(m, timer->function); SEQ_printf(m, ", S:%02x", timer->state); -#ifdef CONFIG_TIMER_STATS - SEQ_printf(m, ", "); - print_name_offset(m, timer->start_site); - memcpy(tmp, timer->start_comm, TASK_COMM_LEN); - tmp[TASK_COMM_LEN] = 0; - SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); -#endif SEQ_printf(m, "\n"); SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n", (unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)), @@ -96,6 +87,9 @@ print_active_timers(struct seq_file *m, struct hrtimer_clock_base *base, next_one: i = 0; + + touch_nmi_watchdog(); + raw_spin_lock_irqsave(&base->cpu_base->lock, flags); curr = timerqueue_getnext(&base->active); @@ -127,7 +121,7 @@ print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) SEQ_printf(m, " .base: %pK\n", base); SEQ_printf(m, " .index: %d\n", base->index); - SEQ_printf(m, " .resolution: %u nsecs\n", (unsigned) hrtimer_resolution); + SEQ_printf(m, " .resolution: %u nsecs\n", hrtimer_resolution); SEQ_printf(m, " .get_time: "); print_name_offset(m, base->get_time); @@ -207,6 +201,8 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) { struct clock_event_device *dev = td->evtdev; + touch_nmi_watchdog(); + SEQ_printf(m, "Tick Device: mode: %d\n", td->mode); if (cpu < 0) SEQ_printf(m, "Broadcast device\n"); diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c deleted file mode 100644 index afddded947df..000000000000 --- a/kernel/time/timer_stats.c +++ /dev/null @@ -1,425 +0,0 @@ -/* - * kernel/time/timer_stats.c - * - * Collect timer usage statistics. - * - * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar - * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <[email protected]> - * - * timer_stats is based on timer_top, a similar functionality which was part of - * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the - * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based - * on dynamic allocation of the statistics entries and linear search based - * lookup combined with a global lock, rather than the static array, hash - * and per-CPU locking which is used by timer_stats. It was written for the - * pre hrtimer kernel code and therefore did not take hrtimers into account. - * Nevertheless it provided the base for the timer_stats implementation and - * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks - * for this effort. - * - * timer_top.c is - * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus - * Written by Daniel Petrini <[email protected]> - * timer_top.c was released under the GNU General Public License version 2 - * - * We export the addresses and counting of timer functions being called, - * the pid and cmdline from the owner process if applicable. - * - * Start/stop data collection: - * # echo [1|0] >/proc/timer_stats - * - * Display the information collected so far: - * # cat /proc/timer_stats - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/proc_fs.h> -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/sched.h> -#include <linux/seq_file.h> -#include <linux/kallsyms.h> - -#include <linux/uaccess.h> - -/* - * This is our basic unit of interest: a timer expiry event identified - * by the timer, its start/expire functions and the PID of the task that - * started the timer. We count the number of times an event happens: - */ -struct entry { - /* - * Hash list: - */ - struct entry *next; - - /* - * Hash keys: - */ - void *timer; - void *start_func; - void *expire_func; - pid_t pid; - - /* - * Number of timeout events: - */ - unsigned long count; - u32 flags; - - /* - * We save the command-line string to preserve - * this information past task exit: - */ - char comm[TASK_COMM_LEN + 1]; - -} ____cacheline_aligned_in_smp; - -/* - * Spinlock protecting the tables - not taken during lookup: - */ -static DEFINE_RAW_SPINLOCK(table_lock); - -/* - * Per-CPU lookup locks for fast hash lookup: - */ -static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock); - -/* - * Mutex to serialize state changes with show-stats activities: - */ -static DEFINE_MUTEX(show_mutex); - -/* - * Collection status, active/inactive: - */ -int __read_mostly timer_stats_active; - -/* - * Beginning/end timestamps of measurement: - */ -static ktime_t time_start, time_stop; - -/* - * tstat entry structs only get allocated while collection is - * active and never freed during that time - this simplifies - * things quite a bit. - * - * They get freed when a new collection period is started. - */ -#define MAX_ENTRIES_BITS 10 -#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS) - -static unsigned long nr_entries; -static struct entry entries[MAX_ENTRIES]; - -static atomic_t overflow_count; - -/* - * The entries are in a hash-table, for fast lookup: - */ -#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1) -#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS) -#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1) - -#define __tstat_hashfn(entry) \ - (((unsigned long)(entry)->timer ^ \ - (unsigned long)(entry)->start_func ^ \ - (unsigned long)(entry)->expire_func ^ \ - (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK) - -#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry)) - -static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; - -static void reset_entries(void) -{ - nr_entries = 0; - memset(entries, 0, sizeof(entries)); - memset(tstat_hash_table, 0, sizeof(tstat_hash_table)); - atomic_set(&overflow_count, 0); -} - -static struct entry *alloc_entry(void) -{ - if (nr_entries >= MAX_ENTRIES) - return NULL; - - return entries + nr_entries++; -} - -static int match_entries(struct entry *entry1, struct entry *entry2) -{ - return entry1->timer == entry2->timer && - entry1->start_func == entry2->start_func && - entry1->expire_func == entry2->expire_func && - entry1->pid == entry2->pid; -} - -/* - * Look up whether an entry matching this item is present - * in the hash already. Must be called with irqs off and the - * lookup lock held: - */ -static struct entry *tstat_lookup(struct entry *entry, char *comm) -{ - struct entry **head, *curr, *prev; - - head = tstat_hashentry(entry); - curr = *head; - - /* - * The fastpath is when the entry is already hashed, - * we do this with the lookup lock held, but with the - * table lock not held: - */ - while (curr) { - if (match_entries(curr, entry)) - return curr; - - curr = curr->next; - } - /* - * Slowpath: allocate, set up and link a new hash entry: - */ - prev = NULL; - curr = *head; - - raw_spin_lock(&table_lock); - /* - * Make sure we have not raced with another CPU: - */ - while (curr) { - if (match_entries(curr, entry)) - goto out_unlock; - - prev = curr; - curr = curr->next; - } - - curr = alloc_entry(); - if (curr) { - *curr = *entry; - curr->count = 0; - curr->next = NULL; - memcpy(curr->comm, comm, TASK_COMM_LEN); - - smp_mb(); /* Ensure that curr is initialized before insert */ - - if (prev) - prev->next = curr; - else - *head = curr; - } - out_unlock: - raw_spin_unlock(&table_lock); - - return curr; -} - -/** - * timer_stats_update_stats - Update the statistics for a timer. - * @timer: pointer to either a timer_list or a hrtimer - * @pid: the pid of the task which set up the timer - * @startf: pointer to the function which did the timer setup - * @timerf: pointer to the timer callback function of the timer - * @comm: name of the process which set up the timer - * @tflags: The flags field of the timer - * - * When the timer is already registered, then the event counter is - * incremented. Otherwise the timer is registered in a free slot. - */ -void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char *comm, u32 tflags) -{ - /* - * It doesn't matter which lock we take: - */ - raw_spinlock_t *lock; - struct entry *entry, input; - unsigned long flags; - - if (likely(!timer_stats_active)) - return; - - lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id()); - - input.timer = timer; - input.start_func = startf; - input.expire_func = timerf; - input.pid = pid; - input.flags = tflags; - - raw_spin_lock_irqsave(lock, flags); - if (!timer_stats_active) - goto out_unlock; - - entry = tstat_lookup(&input, comm); - if (likely(entry)) - entry->count++; - else - atomic_inc(&overflow_count); - - out_unlock: - raw_spin_unlock_irqrestore(lock, flags); -} - -static void print_name_offset(struct seq_file *m, unsigned long addr) -{ - char symname[KSYM_NAME_LEN]; - - if (lookup_symbol_name(addr, symname) < 0) - seq_printf(m, "<%p>", (void *)addr); - else - seq_printf(m, "%s", symname); -} - -static int tstats_show(struct seq_file *m, void *v) -{ - struct timespec64 period; - struct entry *entry; - unsigned long ms; - long events = 0; - ktime_t time; - int i; - - mutex_lock(&show_mutex); - /* - * If still active then calculate up to now: - */ - if (timer_stats_active) - time_stop = ktime_get(); - - time = ktime_sub(time_stop, time_start); - - period = ktime_to_timespec64(time); - ms = period.tv_nsec / 1000000; - - seq_puts(m, "Timer Stats Version: v0.3\n"); - seq_printf(m, "Sample period: %ld.%03ld s\n", (long)period.tv_sec, ms); - if (atomic_read(&overflow_count)) - seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count)); - seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive"); - - for (i = 0; i < nr_entries; i++) { - entry = entries + i; - if (entry->flags & TIMER_DEFERRABLE) { - seq_printf(m, "%4luD, %5d %-16s ", - entry->count, entry->pid, entry->comm); - } else { - seq_printf(m, " %4lu, %5d %-16s ", - entry->count, entry->pid, entry->comm); - } - - print_name_offset(m, (unsigned long)entry->start_func); - seq_puts(m, " ("); - print_name_offset(m, (unsigned long)entry->expire_func); - seq_puts(m, ")\n"); - - events += entry->count; - } - - ms += period.tv_sec * 1000; - if (!ms) - ms = 1; - - if (events && period.tv_sec) - seq_printf(m, "%ld total events, %ld.%03ld events/sec\n", - events, events * 1000 / ms, - (events * 1000000 / ms) % 1000); - else - seq_printf(m, "%ld total events\n", events); - - mutex_unlock(&show_mutex); - - return 0; -} - -/* - * After a state change, make sure all concurrent lookup/update - * activities have stopped: - */ -static void sync_access(void) -{ - unsigned long flags; - int cpu; - - for_each_online_cpu(cpu) { - raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu); - - raw_spin_lock_irqsave(lock, flags); - /* nothing */ - raw_spin_unlock_irqrestore(lock, flags); - } -} - -static ssize_t tstats_write(struct file *file, const char __user *buf, - size_t count, loff_t *offs) -{ - char ctl[2]; - - if (count != 2 || *offs) - return -EINVAL; - - if (copy_from_user(ctl, buf, count)) - return -EFAULT; - - mutex_lock(&show_mutex); - switch (ctl[0]) { - case '0': - if (timer_stats_active) { - timer_stats_active = 0; - time_stop = ktime_get(); - sync_access(); - } - break; - case '1': - if (!timer_stats_active) { - reset_entries(); - time_start = ktime_get(); - smp_mb(); - timer_stats_active = 1; - } - break; - default: - count = -EINVAL; - } - mutex_unlock(&show_mutex); - - return count; -} - -static int tstats_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, tstats_show, NULL); -} - -static const struct file_operations tstats_fops = { - .open = tstats_open, - .read = seq_read, - .write = tstats_write, - .llseek = seq_lseek, - .release = single_release, -}; - -void __init init_timer_stats(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu)); -} - -static int __init init_tstats_procfs(void) -{ - struct proc_dir_entry *pe; - - pe = proc_create("timer_stats", 0644, NULL, &tstats_fops); - if (!pe) - return -ENOMEM; - return 0; -} -__initcall(init_tstats_procfs); |