diff options
Diffstat (limited to 'samples')
| -rw-r--r-- | samples/Kconfig | 7 | ||||
| -rw-r--r-- | samples/Makefile | 2 | ||||
| -rw-r--r-- | samples/bpf/test_maps.c | 4 | ||||
| -rw-r--r-- | samples/livepatch/Makefile | 1 | ||||
| -rw-r--r-- | samples/livepatch/livepatch-sample.c | 91 | ||||
| -rw-r--r-- | samples/trace_events/trace-events-sample.c | 80 | ||||
| -rw-r--r-- | samples/trace_events/trace-events-sample.h | 328 | 
7 files changed, 496 insertions, 17 deletions
| diff --git a/samples/Kconfig b/samples/Kconfig index 6181c2cc9ca0..224ebb46bed5 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -63,4 +63,11 @@ config SAMPLE_RPMSG_CLIENT  	  to communicate with an AMP-configured remote processor over  	  the rpmsg bus. +config SAMPLE_LIVEPATCH +	tristate "Build live patching sample -- loadable modules only" +	depends on LIVEPATCH && m +	help +	  Builds a sample live patch that replaces the procfs handler +	  for /proc/cmdline to print "this has been live patched". +  endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile index 1a60c62e2045..f00257bcc5a7 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -1,4 +1,4 @@  # Makefile for Linux samples code -obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ trace_events/ \ +obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ trace_events/ livepatch/ \  			   hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c index e286b42307f3..6299ee95cd11 100644 --- a/samples/bpf/test_maps.c +++ b/samples/bpf/test_maps.c @@ -69,9 +69,9 @@ static void test_hashmap_sanity(int i, void *data)  	/* iterate over two elements */  	assert(bpf_get_next_key(map_fd, &key, &next_key) == 0 && -	       next_key == 2); +	       (next_key == 1 || next_key == 2));  	assert(bpf_get_next_key(map_fd, &next_key, &next_key) == 0 && -	       next_key == 1); +	       (next_key == 1 || next_key == 2));  	assert(bpf_get_next_key(map_fd, &next_key, &next_key) == -1 &&  	       errno == ENOENT); diff --git a/samples/livepatch/Makefile b/samples/livepatch/Makefile new file mode 100644 index 000000000000..10319d7ea0b1 --- /dev/null +++ b/samples/livepatch/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_SAMPLE_LIVEPATCH) += livepatch-sample.o diff --git a/samples/livepatch/livepatch-sample.c b/samples/livepatch/livepatch-sample.c new file mode 100644 index 000000000000..fb8c8614e728 --- /dev/null +++ b/samples/livepatch/livepatch-sample.c @@ -0,0 +1,91 @@ +/* + * livepatch-sample.c - Kernel Live Patching Sample Module + * + * Copyright (C) 2014 Seth Jennings <[email protected]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/livepatch.h> + +/* + * This (dumb) live patch overrides the function that prints the + * kernel boot cmdline when /proc/cmdline is read. + * + * Example: + * + * $ cat /proc/cmdline + * <your cmdline> + * + * $ insmod livepatch-sample.ko + * $ cat /proc/cmdline + * this has been live patched + * + * $ echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled + * $ cat /proc/cmdline + * <your cmdline> + */ + +#include <linux/seq_file.h> +static int livepatch_cmdline_proc_show(struct seq_file *m, void *v) +{ +	seq_printf(m, "%s\n", "this has been live patched"); +	return 0; +} + +static struct klp_func funcs[] = { +	{ +		.old_name = "cmdline_proc_show", +		.new_func = livepatch_cmdline_proc_show, +	}, { } +}; + +static struct klp_object objs[] = { +	{ +		/* name being NULL means vmlinux */ +		.funcs = funcs, +	}, { } +}; + +static struct klp_patch patch = { +	.mod = THIS_MODULE, +	.objs = objs, +}; + +static int livepatch_init(void) +{ +	int ret; + +	ret = klp_register_patch(&patch); +	if (ret) +		return ret; +	ret = klp_enable_patch(&patch); +	if (ret) { +		WARN_ON(klp_unregister_patch(&patch)); +		return ret; +	} +	return 0; +} + +static void livepatch_exit(void) +{ +	WARN_ON(klp_disable_patch(&patch)); +	WARN_ON(klp_unregister_patch(&patch)); +} + +module_init(livepatch_init); +module_exit(livepatch_exit); +MODULE_LICENSE("GPL"); diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index aabc4e970911..880a7d1d27d2 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -10,12 +10,38 @@  #define CREATE_TRACE_POINTS  #include "trace-events-sample.h" +static const char *random_strings[] = { +	"Mother Goose", +	"Snoopy", +	"Gandalf", +	"Frodo", +	"One ring to rule them all" +};  static void simple_thread_func(int cnt)  { +	int array[6]; +	int len = cnt % 5; +	int i; +  	set_current_state(TASK_INTERRUPTIBLE);  	schedule_timeout(HZ); -	trace_foo_bar("hello", cnt); + +	for (i = 0; i < len; i++) +		array[i] = i + 1; +	array[i] = 0; + +	/* Silly tracepoints */ +	trace_foo_bar("hello", cnt, array, random_strings[len], +		      tsk_cpus_allowed(current)); + +	trace_foo_with_template_simple("HELLO", cnt); + +	trace_foo_bar_with_cond("Some times print", cnt); + +	trace_foo_with_template_cond("prints other times", cnt); + +	trace_foo_with_template_print("I have to be different", cnt);  }  static int simple_thread(void *arg) @@ -29,6 +55,53 @@ static int simple_thread(void *arg)  }  static struct task_struct *simple_tsk; +static struct task_struct *simple_tsk_fn; + +static void simple_thread_func_fn(int cnt) +{ +	set_current_state(TASK_INTERRUPTIBLE); +	schedule_timeout(HZ); + +	/* More silly tracepoints */ +	trace_foo_bar_with_fn("Look at me", cnt); +	trace_foo_with_template_fn("Look at me too", cnt); +} + +static int simple_thread_fn(void *arg) +{ +	int cnt = 0; + +	while (!kthread_should_stop()) +		simple_thread_func_fn(cnt++); + +	return 0; +} + +static DEFINE_MUTEX(thread_mutex); + +void foo_bar_reg(void) +{ +	pr_info("Starting thread for foo_bar_fn\n"); +	/* +	 * We shouldn't be able to start a trace when the module is +	 * unloading (there's other locks to prevent that). But +	 * for consistency sake, we still take the thread_mutex. +	 */ +	mutex_lock(&thread_mutex); +	simple_tsk_fn = kthread_run(simple_thread_fn, NULL, "event-sample-fn"); +	mutex_unlock(&thread_mutex); +} + +void foo_bar_unreg(void) +{ +	pr_info("Killing thread for foo_bar_fn\n"); +	/* protect against module unloading */ +	mutex_lock(&thread_mutex); +	if (simple_tsk_fn) +		kthread_stop(simple_tsk_fn); +	simple_tsk_fn = NULL; +	mutex_unlock(&thread_mutex); +}  static int __init trace_event_init(void)  { @@ -42,6 +115,11 @@ static int __init trace_event_init(void)  static void __exit trace_event_exit(void)  {  	kthread_stop(simple_tsk); +	mutex_lock(&thread_mutex); +	if (simple_tsk_fn) +		kthread_stop(simple_tsk_fn); +	simple_tsk_fn = NULL; +	mutex_unlock(&thread_mutex);  }  module_init(trace_event_init); diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 476429281389..a2c8b02b6359 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -1,6 +1,6 @@  /*   * If TRACE_SYSTEM is defined, that will be the directory created - * in the ftrace directory under /sys/kernel/debug/tracing/events/<system> + * in the ftrace directory under /sys/kernel/tracing/events/<system>   *   * The define_trace.h below will also look for a file name of   * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here. @@ -54,45 +54,347 @@   *    Here it is simply "foo, bar".   *   * struct:  This defines the way the data will be stored in the ring buffer. - *    There are currently two types of elements. __field and __array. - *    a __field is broken up into (type, name). Where type can be any - *    primitive type (integer, long or pointer). __field_struct() can - *    be any static complex data value (struct, union, but not an array). - *    For an array. there are three fields. (type, name, size). The - *    type of elements in the array, the name of the field and the size - *    of the array. + *          The items declared here become part of a special structure + *          called "__entry", which can be used in the fast_assign part of the + *          TRACE_EVENT macro. + * + *      Here are the currently defined types you can use: + * + *   __field : Is broken up into type and name. Where type can be any + *         primitive type (integer, long or pointer). + * + *        __field(int, foo) + * + *        __entry->foo = 5; + * + *   __field_struct : This can be any static complex data type (struct, union + *         but not an array). Be careful using complex types, as each + *         event is limited in size, and copying large amounts of data + *         into the ring buffer can slow things down. + * + *         __field_struct(struct bar, foo) + * + *         __entry->bar.x = y; + + *   __array: There are three fields (type, name, size). The type is the + *         type of elements in teh array, the name is the name of the array. + *         size is the number of items in the array (not the total size). + * + *         __array( char, foo, 10) is the same as saying: char foo[10]; + * + *         Assigning arrays can be done like any array: + * + *         __entry->foo[0] = 'a'; + * + *         memcpy(__entry->foo, bar, 10); + * + *   __dynamic_array: This is similar to array, but can vary is size from + *         instance to instance of the tracepoint being called. + *         Like __array, this too has three elements (type, name, size); + *         type is the type of the element, name is the name of the array. + *         The size is different than __array. It is not a static number, + *         but the algorithm to figure out the length of the array for the + *         specific instance of tracepoint. Again, size is the numebr of + *         items in the array, not the total length in bytes. + * + *         __dynamic_array( int, foo, bar) is similar to: int foo[bar]; + * + *         Note, unlike arrays, you must use the __get_dynamic_array() macro + *         to access the array. + * + *         memcpy(__get_dynamic_array(foo), bar, 10); + * + *         Notice, that "__entry" is not needed here. + * + *   __string: This is a special kind of __dynamic_array. It expects to + *         have a nul terminated character array passed to it (it allows + *         for NULL too, which would be converted into "(null)"). __string + *         takes two paramenter (name, src), where name is the name of + *         the string saved, and src is the string to copy into the + *         ring buffer. + * + *         __string(foo, bar)  is similar to:  strcpy(foo, bar) + * + *         To assign a string, use the helper macro __assign_str(). + * + *         __assign_str(foo, bar); + * + *         In most cases, the __assign_str() macro will take the same + *         parameters as the __string() macro had to declare the string. + * + *   __bitmask: This is another kind of __dynamic_array, but it expects + *         an array of longs, and the number of bits to parse. It takes + *         two parameters (name, nr_bits), where name is the name of the + *         bitmask to save, and the nr_bits is the number of bits to record. + * + *         __bitmask(target_cpu, nr_cpumask_bits) + * + *         To assign a bitmask, use the __assign_bitmask() helper macro. + * + *         __assign_bitmask(target_cpus, cpumask_bits(bar), nr_cpumask_bits);   * - *    __array( char, foo, 10) is the same as saying   char foo[10].   *   * fast_assign: This is a C like function that is used to store the items - *    into the ring buffer. + *    into the ring buffer. A special variable called "__entry" will be the + *    structure that points into the ring buffer and has the same fields as + *    described by the struct part of TRACE_EVENT above.   *   * printk: This is a way to print out the data in pretty print. This is   *    useful if the system crashes and you are logging via a serial line,   *    the data can be printed to the console using this "printk" method. + *    This is also used to print out the data from the trace files. + *    Again, the __entry macro is used to access the data from the ring buffer. + * + *    Note, __dynamic_array, __string, and __bitmask require special helpers + *       to access the data. + * + *      For __dynamic_array(int, foo, bar) use __get_dynamic_array(foo) + *            Use __get_dynamic_array_len(foo) to get the length of the array + *            saved. + * + *      For __string(foo, bar) use __get_str(foo) + * + *      For __bitmask(target_cpus, nr_cpumask_bits) use __get_bitmask(target_cpus) + *   *   * Note, that for both the assign and the printk, __entry is the handler   * to the data structure in the ring buffer, and is defined by the   * TP_STRUCT__entry.   */ + +/* + * It is OK to have helper functions in the file, but they need to be protected + * from being defined more than once. Remember, this file gets included more + * than once. + */ +#ifndef __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS +#define __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS +static inline int __length_of(const int *list) +{ +	int i; + +	if (!list) +		return 0; + +	for (i = 0; list[i]; i++) +		; +	return i; +} +#endif +  TRACE_EVENT(foo_bar, -	TP_PROTO(char *foo, int bar), +	TP_PROTO(const char *foo, int bar, const int *lst, +		 const char *string, const struct cpumask *mask), -	TP_ARGS(foo, bar), +	TP_ARGS(foo, bar, lst, string, mask),  	TP_STRUCT__entry(  		__array(	char,	foo,    10		)  		__field(	int,	bar			) +		__dynamic_array(int,	list,   __length_of(lst)) +		__string(	str,	string			) +		__bitmask(	cpus,	num_possible_cpus()	)  	),  	TP_fast_assign(  		strlcpy(__entry->foo, foo, 10);  		__entry->bar	= bar; +		memcpy(__get_dynamic_array(list), lst, +		       __length_of(lst) * sizeof(int)); +		__assign_str(str, string); +		__assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus()); +	), + +	TP_printk("foo %s %d %s %s (%s)", __entry->foo, __entry->bar, +		  __print_array(__get_dynamic_array(list), +				__get_dynamic_array_len(list), +				sizeof(int)), +		  __get_str(str), __get_bitmask(cpus)) +); + +/* + * There may be a case where a tracepoint should only be called if + * some condition is set. Otherwise the tracepoint should not be called. + * But to do something like: + * + *  if (cond) + *     trace_foo(); + * + * Would cause a little overhead when tracing is not enabled, and that + * overhead, even if small, is not something we want. As tracepoints + * use static branch (aka jump_labels), where no branch is taken to + * skip the tracepoint when not enabled, and a jmp is placed to jump + * to the tracepoint code when it is enabled, having a if statement + * nullifies that optimization. It would be nice to place that + * condition within the static branch. This is where TRACE_EVENT_CONDITION + * comes in. + * + * TRACE_EVENT_CONDITION() is just like TRACE_EVENT, except it adds another + * parameter just after args. Where TRACE_EVENT has: + * + * TRACE_EVENT(name, proto, args, struct, assign, printk) + * + * the CONDITION version has: + * + * TRACE_EVENT_CONDITION(name, proto, args, cond, struct, assign, printk) + * + * Everything is the same as TRACE_EVENT except for the new cond. Think + * of the cond variable as: + * + *   if (cond) + *      trace_foo_bar_with_cond(); + * + * Except that the logic for the if branch is placed after the static branch. + * That is, the if statement that processes the condition will not be + * executed unless that traecpoint is enabled. Otherwise it still remains + * a nop. + */ +TRACE_EVENT_CONDITION(foo_bar_with_cond, + +	TP_PROTO(const char *foo, int bar), + +	TP_ARGS(foo, bar), + +	TP_CONDITION(!(bar % 10)), + +	TP_STRUCT__entry( +		__string(	foo,    foo		) +		__field(	int,	bar			) +	), + +	TP_fast_assign( +		__assign_str(foo, foo); +		__entry->bar	= bar; +	), + +	TP_printk("foo %s %d", __get_str(foo), __entry->bar) +); + +void foo_bar_reg(void); +void foo_bar_unreg(void); + +/* + * Now in the case that some function needs to be called when the + * tracepoint is enabled and/or when it is disabled, the + * TRACE_EVENT_FN() serves this purpose. This is just like TRACE_EVENT() + * but adds two more parameters at the end: + * + * TRACE_EVENT_FN( name, proto, args, struct, assign, printk, reg, unreg) + * + * reg and unreg are functions with the prototype of: + * + *    void reg(void) + * + * The reg function gets called before the tracepoint is enabled, and + * the unreg function gets called after the tracepoint is disabled. + * + * Note, reg and unreg are allowed to be NULL. If you only need to + * call a function before enabling, or after disabling, just set one + * function and pass in NULL for the other parameter. + */ +TRACE_EVENT_FN(foo_bar_with_fn, + +	TP_PROTO(const char *foo, int bar), + +	TP_ARGS(foo, bar), + +	TP_STRUCT__entry( +		__string(	foo,    foo		) +		__field(	int,	bar		) +	), + +	TP_fast_assign( +		__assign_str(foo, foo); +		__entry->bar	= bar; +	), + +	TP_printk("foo %s %d", __get_str(foo), __entry->bar), + +	foo_bar_reg, foo_bar_unreg +); + +/* + * Each TRACE_EVENT macro creates several helper functions to produce + * the code to add the tracepoint, create the files in the trace + * directory, hook it to perf, assign the values and to print out + * the raw data from the ring buffer. To prevent too much bloat, + * if there are more than one tracepoint that uses the same format + * for the proto, args, struct, assign and printk, and only the name + * is different, it is highly recommended to use the DECLARE_EVENT_CLASS + * + * DECLARE_EVENT_CLASS() macro creates most of the functions for the + * tracepoint. Then DEFINE_EVENT() is use to hook a tracepoint to those + * functions. This DEFINE_EVENT() is an instance of the class and can + * be enabled and disabled separately from other events (either TRACE_EVENT + * or other DEFINE_EVENT()s). + * + * Note, TRACE_EVENT() itself is simply defined as: + * + * #define TRACE_EVENT(name, proto, args, tstruct, assign, printk)  \ + *  DEFINE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \ + *  DEFINE_EVENT(name, name, proto, args) + * + * The DEFINE_EVENT() also can be declared with conditions and reg functions: + * + * DEFINE_EVENT_CONDITION(template, name, proto, args, cond); + * DEFINE_EVENT_FN(template, name, proto, args, reg, unreg); + */ +DECLARE_EVENT_CLASS(foo_template, + +	TP_PROTO(const char *foo, int bar), + +	TP_ARGS(foo, bar), + +	TP_STRUCT__entry( +		__string(	foo,    foo		) +		__field(	int,	bar		) +	), + +	TP_fast_assign( +		__assign_str(foo, foo); +		__entry->bar	= bar;  	), -	TP_printk("foo %s %d", __entry->foo, __entry->bar) +	TP_printk("foo %s %d", __get_str(foo), __entry->bar)  ); + +/* + * Here's a better way for the previous samples (except, the first + * exmaple had more fields and could not be used here). + */ +DEFINE_EVENT(foo_template, foo_with_template_simple, +	TP_PROTO(const char *foo, int bar), +	TP_ARGS(foo, bar)); + +DEFINE_EVENT_CONDITION(foo_template, foo_with_template_cond, +	TP_PROTO(const char *foo, int bar), +	TP_ARGS(foo, bar), +	TP_CONDITION(!(bar % 8))); + + +DEFINE_EVENT_FN(foo_template, foo_with_template_fn, +	TP_PROTO(const char *foo, int bar), +	TP_ARGS(foo, bar), +	foo_bar_reg, foo_bar_unreg); + +/* + * Anytime two events share basically the same values and have + * the same output, use the DECLARE_EVENT_CLASS() and DEFINE_EVENT() + * when ever possible. + */ + +/* + * If the event is similar to the DECLARE_EVENT_CLASS, but you need + * to have a different output, then use DEFINE_EVENT_PRINT() which + * lets you override the TP_printk() of the class. + */ + +DEFINE_EVENT_PRINT(foo_template, foo_with_template_print, +	TP_PROTO(const char *foo, int bar), +	TP_ARGS(foo, bar), +	TP_printk("bar %s %d", __get_str(foo), __entry->bar)); +  #endif  /***** NOTICE! The #if protection ends here. *****/ |