From 3292beb340c76884427faa1f5d6085719477d889 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Mon, 28 Nov 2011 14:45:17 -0200 Subject: sched/accounting: Change cpustat fields to an array This patch changes fields in cpustat from a structure, to an u64 array. Math gets easier, and the code is more flexible. Signed-off-by: Glauber Costa Reviewed-by: KAMEZAWA Hiroyuki Cc: Linus Torvalds Cc: Andrew Morton Cc: Paul Tuner Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1322498719-2255-2-git-send-email-glommer@parallels.com Signed-off-by: Ingo Molnar --- fs/proc/stat.c | 63 +++++++++++++++++++++++++++------------------------------- 1 file changed, 29 insertions(+), 34 deletions(-) (limited to 'fs/proc/stat.c') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 42b274da92c3..8a6ab666e9f8 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -22,29 +22,27 @@ #define arch_idle_time(cpu) 0 #endif -static cputime64_t get_idle_time(int cpu) +static u64 get_idle_time(int cpu) { - u64 idle_time = get_cpu_idle_time_us(cpu, NULL); - cputime64_t idle; + u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL); if (idle_time == -1ULL) { /* !NO_HZ so we can rely on cpustat.idle */ - idle = kstat_cpu(cpu).cpustat.idle; - idle = cputime64_add(idle, arch_idle_time(cpu)); + idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; + idle += arch_idle_time(cpu); } else idle = usecs_to_cputime(idle_time); return idle; } -static cputime64_t get_iowait_time(int cpu) +static u64 get_iowait_time(int cpu) { - u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL); - cputime64_t iowait; + u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL); if (iowait_time == -1ULL) /* !NO_HZ so we can rely on cpustat.iowait */ - iowait = kstat_cpu(cpu).cpustat.iowait; + iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; else iowait = usecs_to_cputime(iowait_time); @@ -55,33 +53,30 @@ static int show_stat(struct seq_file *p, void *v) { int i, j; unsigned long jif; - cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; - cputime64_t guest, guest_nice; + u64 user, nice, system, idle, iowait, irq, softirq, steal; + u64 guest, guest_nice; u64 sum = 0; u64 sum_softirq = 0; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; struct timespec boottime; user = nice = system = idle = iowait = - irq = softirq = steal = cputime64_zero; - guest = guest_nice = cputime64_zero; + irq = softirq = steal = 0; + guest = guest_nice = 0; getboottime(&boottime); jif = boottime.tv_sec; for_each_possible_cpu(i) { - user = cputime64_add(user, kstat_cpu(i).cpustat.user); - nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); - system = cputime64_add(system, kstat_cpu(i).cpustat.system); - idle = cputime64_add(idle, get_idle_time(i)); - iowait = cputime64_add(iowait, get_iowait_time(i)); - irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); - softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); - steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); - guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); - guest_nice = cputime64_add(guest_nice, - kstat_cpu(i).cpustat.guest_nice); - sum += kstat_cpu_irqs_sum(i); - sum += arch_irq_stat_cpu(i); + user += kcpustat_cpu(i).cpustat[CPUTIME_USER]; + nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE]; + system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]; + idle += get_idle_time(i); + iowait += get_iowait_time(i); + irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ]; + softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]; + steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; + guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; + guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; for (j = 0; j < NR_SOFTIRQS; j++) { unsigned int softirq_stat = kstat_softirqs_cpu(j, i); @@ -106,16 +101,16 @@ static int show_stat(struct seq_file *p, void *v) (unsigned long long)cputime64_to_clock_t(guest_nice)); for_each_online_cpu(i) { /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ - user = kstat_cpu(i).cpustat.user; - nice = kstat_cpu(i).cpustat.nice; - system = kstat_cpu(i).cpustat.system; + user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; + nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE]; + system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]; idle = get_idle_time(i); iowait = get_iowait_time(i); - irq = kstat_cpu(i).cpustat.irq; - softirq = kstat_cpu(i).cpustat.softirq; - steal = kstat_cpu(i).cpustat.steal; - guest = kstat_cpu(i).cpustat.guest; - guest_nice = kstat_cpu(i).cpustat.guest_nice; + irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ]; + softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]; + steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; + guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; + guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " "%llu\n", -- cgit From 2a95ea6c0d129b4568fb64e1deda16ceb20e6636 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 8 Dec 2011 14:34:32 -0800 Subject: procfs: do not overflow get_{idle,iowait}_time for nohz Since commit a25cac5198d4 ("proc: Consider NO_HZ when printing idle and iowait times") we are reporting idle/io_wait time also while a CPU is tickless. We rely on get_{idle,iowait}_time functions to retrieve proper data. These functions, however, use usecs_to_cputime to translate micro seconds time to cputime64_t. This is just an alias to usecs_to_jiffies which reduces the data type from u64 to unsigned int and also checks whether the given parameter overflows jiffies_to_usecs(MAX_JIFFY_OFFSET) and returns MAX_JIFFY_OFFSET in that case. When we overflow depends on CONFIG_HZ but especially for CONFIG_HZ_300 it is quite low (1431649781) so we are getting MAX_JIFFY_OFFSET for >3000s! until we overflow unsigned int. Just for reference CONFIG_HZ_100 has an overflow window around 20s, CONFIG_HZ_250 ~8s and CONFIG_HZ_1000 ~2s. This results in a bug when people saw [h]top going mad reporting 100% CPU usage even though there was basically no CPU load. The reason was simply that /proc/stat stopped reporting idle/io_wait changes (and reported MAX_JIFFY_OFFSET) and so the only change happening was for user system time. Let's use nsecs_to_jiffies64 instead which doesn't reduce the precision to 32b type and it is much more appropriate for cumulative time values (unlike usecs_to_jiffies which intended for timeout calculations). Signed-off-by: Michal Hocko Tested-by: Artem S. Tashkinov Cc: Dave Jones Cc: Arnd Bergmann Cc: Alexey Dobriyan Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/stat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/proc/stat.c') diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 42b274da92c3..2a30d67dd6b8 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -32,7 +32,7 @@ static cputime64_t get_idle_time(int cpu) idle = kstat_cpu(cpu).cpustat.idle; idle = cputime64_add(idle, arch_idle_time(cpu)); } else - idle = usecs_to_cputime(idle_time); + idle = nsecs_to_jiffies64(1000 * idle_time); return idle; } @@ -46,7 +46,7 @@ static cputime64_t get_iowait_time(int cpu) /* !NO_HZ so we can rely on cpustat.iowait */ iowait = kstat_cpu(cpu).cpustat.iowait; else - iowait = usecs_to_cputime(iowait_time); + iowait = nsecs_to_jiffies64(1000 * iowait_time); return iowait; } -- cgit From 34845636a184f3be91a531098192592cbe6db587 Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Wed, 28 Dec 2011 15:57:15 -0800 Subject: procfs: do not confuse jiffies with cputime64_t Commit 2a95ea6c0d129b4 ("procfs: do not overflow get_{idle,iowait}_time for nohz") did not take into account that one some architectures jiffies and cputime use different units. This causes get_idle_time() to return numbers in the wrong units, making the idle time fields in /proc/stat wrong. Instead of converting the usec value returned by get_cpu_{idle,iowait}_time_us to units of jiffies, use the new function usecs_to_cputime64 to convert it to the correct unit of cputime64_t. Signed-off-by: Andreas Schwab Acked-by: Michal Hocko Cc: Arnd Bergmann Cc: "Artem S. Tashkinov" Cc: Dave Jones Cc: Alexey Dobriyan Cc: Thomas Gleixner Cc: "Luck, Tony" Cc: Benjamin Herrenschmidt Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/cputime.h | 1 + arch/powerpc/include/asm/cputime.h | 2 ++ arch/s390/include/asm/cputime.h | 2 ++ fs/proc/stat.c | 4 ++-- include/asm-generic/cputime.h | 1 + 5 files changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/proc/stat.c') diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index 6073b187528a..5a274af31b2b 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -60,6 +60,7 @@ typedef u64 cputime64_t; */ #define cputime_to_usecs(__ct) ((__ct) / NSEC_PER_USEC) #define usecs_to_cputime(__usecs) ((__usecs) * NSEC_PER_USEC) +#define usecs_to_cputime64(__usecs) usecs_to_cputime(__usecs) /* * Convert cputime <-> seconds diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 1cf20bdfbeca..98b7c4b49c9d 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -150,6 +150,8 @@ static inline cputime_t usecs_to_cputime(const unsigned long us) return ct; } +#define usecs_to_cputime64(us) usecs_to_cputime(us) + /* * Convert cputime <-> seconds */ diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 081434878296..b9acaaa175d8 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -87,6 +87,8 @@ usecs_to_cputime(const unsigned int m) return (cputime_t) m * 4096; } +#define usecs_to_cputime64(m) usecs_to_cputime(m) + /* * Convert cputime to milliseconds and back. */ diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 2a30d67dd6b8..0855e6f20391 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -32,7 +32,7 @@ static cputime64_t get_idle_time(int cpu) idle = kstat_cpu(cpu).cpustat.idle; idle = cputime64_add(idle, arch_idle_time(cpu)); } else - idle = nsecs_to_jiffies64(1000 * idle_time); + idle = usecs_to_cputime64(idle_time); return idle; } @@ -46,7 +46,7 @@ static cputime64_t get_iowait_time(int cpu) /* !NO_HZ so we can rely on cpustat.iowait */ iowait = kstat_cpu(cpu).cpustat.iowait; else - iowait = nsecs_to_jiffies64(1000 * iowait_time); + iowait = usecs_to_cputime64(iowait_time); return iowait; } diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h index 62ce6823c0f2..12a1764f612b 100644 --- a/include/asm-generic/cputime.h +++ b/include/asm-generic/cputime.h @@ -40,6 +40,7 @@ typedef u64 cputime64_t; */ #define cputime_to_usecs(__ct) jiffies_to_usecs(__ct) #define usecs_to_cputime(__msecs) usecs_to_jiffies(__msecs) +#define usecs_to_cputime64(__msecs) nsecs_to_jiffies64((__msecs) * 1000) /* * Convert cputime to seconds and back. -- cgit