From d8e5ddef21df05bc2a14d8c0e287cbf083da60a3 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 6 Feb 2012 16:36:48 -0800 Subject: sysinfo: Move struct sysinfo to a separate header file struct sysinfo is just about the only thing exported to userspace from , so move it into a separate header file with a residual #include in . Originally-by: H. J. Lu Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/n/tip-4pr1xnnksprt7t0h3w5fw4rv@git.kernel.org --- include/linux/kernel.h | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e8343422240a..dc6a50f88ca7 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -1,6 +1,8 @@ #ifndef _LINUX_KERNEL_H #define _LINUX_KERNEL_H +#include + /* * 'kernel.h' contains some often-used function prototypes etc */ @@ -745,27 +747,8 @@ extern int __build_bug_on_failed; # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD #endif -struct sysinfo; extern int do_sysinfo(struct sysinfo *info); #endif /* __KERNEL__ */ -#define SI_LOAD_SHIFT 16 -struct sysinfo { - long uptime; /* Seconds since boot */ - unsigned long loads[3]; /* 1, 5, and 15 minute load averages */ - unsigned long totalram; /* Total usable main memory size */ - unsigned long freeram; /* Available memory size */ - unsigned long sharedram; /* Amount of shared memory */ - unsigned long bufferram; /* Memory used by buffers */ - unsigned long totalswap; /* Total swap space size */ - unsigned long freeswap; /* swap space still available */ - unsigned short procs; /* Number of current processes */ - unsigned short pad; /* explicit padding for m68k */ - unsigned long totalhigh; /* Total high memory size */ - unsigned long freehigh; /* Available high memory size */ - unsigned int mem_unit; /* Memory unit size in bytes */ - char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */ -}; - #endif -- cgit From 93d68e52295fb8b65ded6db49e32e63b6a203e0b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 20 Mar 2012 12:28:29 -0400 Subject: tracing: Move the tracing_on/off() declarations into CONFIG_TRACING The tracing_on/off() declarations were under CONFIG_RING_BUFFER, but the functions are now only defined under CONFIG_TRACING as they are specific to ftrace and not the ring buffer. But the declarations were still defined under the ring buffer and this caused the build to fail when CONFIG_RING_BUFFER was set but CONFIG_TRACING was not. Reported-by: Ingo Molnar Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e8343422240a..51b9548fd12d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -414,16 +414,10 @@ extern int __must_check hex2bin(u8 *dst, const char *src, size_t count); * Most likely, you want to use tracing_on/tracing_off. */ #ifdef CONFIG_RING_BUFFER -void tracing_on(void); -void tracing_off(void); /* trace_off_permanent stops recording with no way to bring it back */ void tracing_off_permanent(void); -int tracing_is_on(void); #else -static inline void tracing_on(void) { } -static inline void tracing_off(void) { } static inline void tracing_off_permanent(void) { } -static inline int tracing_is_on(void) { return 0; } #endif enum ftrace_dump_mode { @@ -433,6 +427,10 @@ enum ftrace_dump_mode { }; #ifdef CONFIG_TRACING +void tracing_on(void); +void tracing_off(void); +int tracing_is_on(void); + extern void tracing_start(void); extern void tracing_stop(void); extern void ftrace_off_permanent(void); @@ -517,6 +515,11 @@ static inline void tracing_start(void) { } static inline void tracing_stop(void) { } static inline void ftrace_off_permanent(void) { } static inline void trace_dump_stack(void) { } + +static inline void tracing_on(void) { } +static inline void tracing_off(void) { } +static inline int tracing_is_on(void) { return 0; } + static inline int trace_printk(const char *fmt, ...) { -- cgit From 07d777fe8c3985bc83428c2866713c2d1b3d4129 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 Sep 2011 14:01:55 -0400 Subject: tracing: Add percpu buffers for trace_printk() Currently, trace_printk() uses a single buffer to write into to calculate the size and format needed to save the trace. To do this safely in an SMP environment, a spin_lock() is taken to only allow one writer at a time to the buffer. But this could also affect what is being traced, and add synchronization that would not be there otherwise. Ideally, using percpu buffers would be useful, but since trace_printk() is only used in development, having per cpu buffers for something never used is a waste of space. Thus, the use of the trace_bprintk() format section is changed to be used for static fmts as well as dynamic ones. Then at boot up, we can check if the section that holds the trace_printk formats is non-empty, and if it does contain something, then we know a trace_printk() has been added to the kernel. At this time the trace_printk per cpu buffers are allocated. A check is also done at module load time in case a module is added that contains a trace_printk(). Once the buffers are allocated, they are never freed. If you use a trace_printk() then you should know what you are doing. A buffer is made for each type of context: normal softirq irq nmi The context is checked and the appropriate buffer is used. This allows for totally lockless usage of trace_printk(), and they no longer even disable interrupts. Requested-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- include/linux/kernel.h | 13 ++-- kernel/trace/trace.c | 184 ++++++++++++++++++++++++++++++++------------ kernel/trace/trace.h | 2 + kernel/trace/trace_printk.c | 4 + 4 files changed, 148 insertions(+), 55 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 645231c373c8..c0d34420a913 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -480,15 +480,16 @@ do { \ #define trace_printk(fmt, args...) \ do { \ + static const char *trace_printk_fmt \ + __attribute__((section("__trace_printk_fmt"))) = \ + __builtin_constant_p(fmt) ? fmt : NULL; \ + \ __trace_printk_check_format(fmt, ##args); \ - if (__builtin_constant_p(fmt)) { \ - static const char *trace_printk_fmt \ - __attribute__((section("__trace_printk_fmt"))) = \ - __builtin_constant_p(fmt) ? fmt : NULL; \ \ + if (__builtin_constant_p(fmt)) \ __trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args); \ - } else \ - __trace_printk(_THIS_IP_, fmt, ##args); \ + else \ + __trace_printk(_THIS_IP_, fmt, ##args); \ } while (0) extern __printf(2, 3) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ed7b5d1e12f4..1ab8e35d069b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1498,25 +1498,119 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags) #endif /* CONFIG_STACKTRACE */ +/* created for use with alloc_percpu */ +struct trace_buffer_struct { + char buffer[TRACE_BUF_SIZE]; +}; + +static struct trace_buffer_struct *trace_percpu_buffer; +static struct trace_buffer_struct *trace_percpu_sirq_buffer; +static struct trace_buffer_struct *trace_percpu_irq_buffer; +static struct trace_buffer_struct *trace_percpu_nmi_buffer; + +/* + * The buffer used is dependent on the context. There is a per cpu + * buffer for normal context, softirq contex, hard irq context and + * for NMI context. Thise allows for lockless recording. + * + * Note, if the buffers failed to be allocated, then this returns NULL + */ +static char *get_trace_buf(void) +{ + struct trace_buffer_struct *percpu_buffer; + struct trace_buffer_struct *buffer; + + /* + * If we have allocated per cpu buffers, then we do not + * need to do any locking. + */ + if (in_nmi()) + percpu_buffer = trace_percpu_nmi_buffer; + else if (in_irq()) + percpu_buffer = trace_percpu_irq_buffer; + else if (in_softirq()) + percpu_buffer = trace_percpu_sirq_buffer; + else + percpu_buffer = trace_percpu_buffer; + + if (!percpu_buffer) + return NULL; + + buffer = per_cpu_ptr(percpu_buffer, smp_processor_id()); + + return buffer->buffer; +} + +static int alloc_percpu_trace_buffer(void) +{ + struct trace_buffer_struct *buffers; + struct trace_buffer_struct *sirq_buffers; + struct trace_buffer_struct *irq_buffers; + struct trace_buffer_struct *nmi_buffers; + + buffers = alloc_percpu(struct trace_buffer_struct); + if (!buffers) + goto err_warn; + + sirq_buffers = alloc_percpu(struct trace_buffer_struct); + if (!sirq_buffers) + goto err_sirq; + + irq_buffers = alloc_percpu(struct trace_buffer_struct); + if (!irq_buffers) + goto err_irq; + + nmi_buffers = alloc_percpu(struct trace_buffer_struct); + if (!nmi_buffers) + goto err_nmi; + + trace_percpu_buffer = buffers; + trace_percpu_sirq_buffer = sirq_buffers; + trace_percpu_irq_buffer = irq_buffers; + trace_percpu_nmi_buffer = nmi_buffers; + + return 0; + + err_nmi: + free_percpu(irq_buffers); + err_irq: + free_percpu(sirq_buffers); + err_sirq: + free_percpu(buffers); + err_warn: + WARN(1, "Could not allocate percpu trace_printk buffer"); + return -ENOMEM; +} + +void trace_printk_init_buffers(void) +{ + static int buffers_allocated; + + if (buffers_allocated) + return; + + if (alloc_percpu_trace_buffer()) + return; + + pr_info("ftrace: Allocated trace_printk buffers\n"); + + buffers_allocated = 1; +} + /** * trace_vbprintk - write binary msg to tracing buffer * */ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) { - static arch_spinlock_t trace_buf_lock = - (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; - static u32 trace_buf[TRACE_BUF_SIZE]; - struct ftrace_event_call *call = &event_bprint; struct ring_buffer_event *event; struct ring_buffer *buffer; struct trace_array *tr = &global_trace; - struct trace_array_cpu *data; struct bprint_entry *entry; unsigned long flags; - int disable; - int cpu, len = 0, size, pc; + char *tbuffer; + int len = 0, size, pc; if (unlikely(tracing_selftest_running || tracing_disabled)) return 0; @@ -1526,43 +1620,36 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) pc = preempt_count(); preempt_disable_notrace(); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - disable = atomic_inc_return(&data->disabled); - if (unlikely(disable != 1)) + tbuffer = get_trace_buf(); + if (!tbuffer) { + len = 0; goto out; + } - /* Lockdep uses trace_printk for lock tracing */ - local_irq_save(flags); - arch_spin_lock(&trace_buf_lock); - len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args); + len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); - if (len > TRACE_BUF_SIZE || len < 0) - goto out_unlock; + if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) + goto out; + local_save_flags(flags); size = sizeof(*entry) + sizeof(u32) * len; buffer = tr->buffer; event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, flags, pc); if (!event) - goto out_unlock; + goto out; entry = ring_buffer_event_data(event); entry->ip = ip; entry->fmt = fmt; - memcpy(entry->buf, trace_buf, sizeof(u32) * len); + memcpy(entry->buf, tbuffer, sizeof(u32) * len); if (!filter_check_discard(call, entry, buffer, event)) { ring_buffer_unlock_commit(buffer, event); ftrace_trace_stack(buffer, flags, 6, pc); } -out_unlock: - arch_spin_unlock(&trace_buf_lock); - local_irq_restore(flags); - out: - atomic_dec_return(&data->disabled); preempt_enable_notrace(); unpause_graph_tracing(); @@ -1588,58 +1675,53 @@ int trace_array_printk(struct trace_array *tr, int trace_array_vprintk(struct trace_array *tr, unsigned long ip, const char *fmt, va_list args) { - static arch_spinlock_t trace_buf_lock = __ARCH_SPIN_LOCK_UNLOCKED; - static char trace_buf[TRACE_BUF_SIZE]; - struct ftrace_event_call *call = &event_print; struct ring_buffer_event *event; struct ring_buffer *buffer; - struct trace_array_cpu *data; - int cpu, len = 0, size, pc; + int len = 0, size, pc; struct print_entry *entry; - unsigned long irq_flags; - int disable; + unsigned long flags; + char *tbuffer; if (tracing_disabled || tracing_selftest_running) return 0; + /* Don't pollute graph traces with trace_vprintk internals */ + pause_graph_tracing(); + pc = preempt_count(); preempt_disable_notrace(); - cpu = raw_smp_processor_id(); - data = tr->data[cpu]; - disable = atomic_inc_return(&data->disabled); - if (unlikely(disable != 1)) + + tbuffer = get_trace_buf(); + if (!tbuffer) { + len = 0; goto out; + } - pause_graph_tracing(); - raw_local_irq_save(irq_flags); - arch_spin_lock(&trace_buf_lock); - len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args); + len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); + if (len > TRACE_BUF_SIZE) + goto out; + local_save_flags(flags); size = sizeof(*entry) + len + 1; buffer = tr->buffer; event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, - irq_flags, pc); + flags, pc); if (!event) - goto out_unlock; + goto out; entry = ring_buffer_event_data(event); entry->ip = ip; - memcpy(&entry->buf, trace_buf, len); + memcpy(&entry->buf, tbuffer, len); entry->buf[len] = '\0'; if (!filter_check_discard(call, entry, buffer, event)) { ring_buffer_unlock_commit(buffer, event); - ftrace_trace_stack(buffer, irq_flags, 6, pc); + ftrace_trace_stack(buffer, flags, 6, pc); } - - out_unlock: - arch_spin_unlock(&trace_buf_lock); - raw_local_irq_restore(irq_flags); - unpause_graph_tracing(); out: - atomic_dec_return(&data->disabled); preempt_enable_notrace(); + unpause_graph_tracing(); return len; } @@ -4955,6 +5037,10 @@ __init static int tracer_alloc_buffers(void) if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) goto out_free_buffer_mask; + /* Only allocate trace_printk buffers if a trace_printk exists */ + if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt) + trace_printk_init_buffers(); + /* To save memory, keep the ring buffer size to its minimum */ if (ring_buffer_expanded) ring_buf_size = trace_buf_size; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 95059f091a24..f9d85504f04b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -826,6 +826,8 @@ extern struct list_head ftrace_events; extern const char *__start___trace_bprintk_fmt[]; extern const char *__stop___trace_bprintk_fmt[]; +void trace_printk_init_buffers(void); + #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ extern struct ftrace_event_call \ diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 6fd4ffd042f9..a9077c1b4ad3 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -51,6 +51,10 @@ void hold_module_trace_bprintk_format(const char **start, const char **end) const char **iter; char *fmt; + /* allocate the trace_printk per cpu buffers */ + if (start != end) + trace_printk_init_buffers(); + mutex_lock(&btrace_mutex); for (iter = start; iter < end; iter++) { struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter); -- cgit From 446969084d33a4064a39d280806da642c54ba4ac Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 23 May 2012 20:12:50 -0700 Subject: kernel: Move REPEAT_BYTE definition into linux/kernel.h And make sure that everything using it explicitly includes that header file. Signed-off-by: David S. Miller --- arch/sparc/lib/usercopy.c | 3 +-- arch/x86/include/asm/word-at-a-time.h | 4 ++-- fs/namei.c | 1 + include/linux/kernel.h | 2 ++ 4 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/arch/sparc/lib/usercopy.c b/arch/sparc/lib/usercopy.c index f61ed820cb61..0b12e91d6ccc 100644 --- a/arch/sparc/lib/usercopy.c +++ b/arch/sparc/lib/usercopy.c @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -11,8 +12,6 @@ void copy_from_user_overflow(void) } EXPORT_SYMBOL(copy_from_user_overflow); -#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) - static inline long find_zero(unsigned long mask) { long byte = 0; diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index e58f03b206c3..ae03facfadd6 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h @@ -1,6 +1,8 @@ #ifndef _ASM_WORD_AT_A_TIME_H #define _ASM_WORD_AT_A_TIME_H +#include + /* * This is largely generic for little-endian machines, but the * optimal byte mask counting is probably going to be something @@ -35,8 +37,6 @@ static inline long count_masked_bytes(long mask) #endif -#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) - /* Return the high bit set in the first byte that is a zero */ static inline unsigned long has_zero(unsigned long a) { diff --git a/fs/namei.c b/fs/namei.c index f9e883c1b856..8d2ba420e42f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 645231c373c8..fbe9bfacb8db 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -38,6 +38,8 @@ #define STACK_MAGIC 0xdeadbeef +#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) + #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) #define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) -- cgit From a3860c1c5dd1137db23d7786d284939c5761d517 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Thu, 31 May 2012 16:26:04 -0700 Subject: introduce SIZE_MAX ULONG_MAX is often used to check for integer overflow when calculating allocation size. While ULONG_MAX happens to work on most systems, there is no guarantee that `size_t' must be the same size as `long'. This patch introduces SIZE_MAX, the maximum value of `size_t', to improve portability and readability for allocation size validation. Signed-off-by: Xi Wang Acked-by: Alex Elder Cc: David Airlie Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ceph/snap.c | 2 +- include/drm/drm_mem_util.h | 4 ++-- include/linux/kernel.h | 1 + include/linux/slab.h | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux/kernel.h') diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index f04c0961f993..e5206fc76562 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -331,7 +331,7 @@ static int build_snap_context(struct ceph_snap_realm *realm) /* alloc new snap context */ err = -ENOMEM; - if (num > (ULONG_MAX - sizeof(*snapc)) / sizeof(u64)) + if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64)) goto fail; snapc = kzalloc(sizeof(*snapc) + num*sizeof(u64), GFP_NOFS); if (!snapc) diff --git a/include/drm/drm_mem_util.h b/include/drm/drm_mem_util.h index 6bd325fedc87..19a240446fca 100644 --- a/include/drm/drm_mem_util.h +++ b/include/drm/drm_mem_util.h @@ -31,7 +31,7 @@ static __inline__ void *drm_calloc_large(size_t nmemb, size_t size) { - if (size != 0 && nmemb > ULONG_MAX / size) + if (size != 0 && nmemb > SIZE_MAX / size) return NULL; if (size * nmemb <= PAGE_SIZE) @@ -44,7 +44,7 @@ static __inline__ void *drm_calloc_large(size_t nmemb, size_t size) /* Modeled after cairo's malloc_ab, it's like calloc but without the zeroing. */ static __inline__ void *drm_malloc_ab(size_t nmemb, size_t size) { - if (size != 0 && nmemb > ULONG_MAX / size) + if (size != 0 && nmemb > SIZE_MAX / size) return NULL; if (size * nmemb <= PAGE_SIZE) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index ec55a3c8ba77..e07f5e0c5df4 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -35,6 +35,7 @@ #define LLONG_MAX ((long long)(~0ULL>>1)) #define LLONG_MIN (-LLONG_MAX - 1) #define ULLONG_MAX (~0ULL) +#define SIZE_MAX (~(size_t)0) #define STACK_MAGIC 0xdeadbeef diff --git a/include/linux/slab.h b/include/linux/slab.h index a595dce6b0c7..67d5d94b783a 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -242,7 +242,7 @@ size_t ksize(const void *); */ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags) { - if (size != 0 && n > ULONG_MAX / size) + if (size != 0 && n > SIZE_MAX / size) return NULL; return __kmalloc(n * size, flags); } -- cgit