diff options
Diffstat (limited to 'kernel/trace')
| -rw-r--r-- | kernel/trace/Kconfig | 9 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 28 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 44 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer_benchmark.c | 2 | ||||
| -rw-r--r-- | kernel/trace/rv/monitors/wip/wip.h | 2 | ||||
| -rw-r--r-- | kernel/trace/rv/monitors/wwnr/wwnr.h | 2 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 106 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 31 | ||||
| -rw-r--r-- | kernel/trace/trace_event_perf.c | 16 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 66 | ||||
| -rw-r--r-- | kernel/trace/trace_events_hist.c | 190 | ||||
| -rw-r--r-- | kernel/trace/trace_events_synth.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_events_trigger.c | 19 | ||||
| -rw-r--r-- | kernel/trace/trace_events_user.c | 1 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_osnoise.c | 244 | ||||
| -rw-r--r-- | kernel/trace/trace_output.c | 71 | ||||
| -rw-r--r-- | kernel/trace/trace_probe.c | 67 | ||||
| -rw-r--r-- | kernel/trace/trace_probe.h | 19 | ||||
| -rw-r--r-- | kernel/trace/trace_probe_tmpl.h | 47 | ||||
| -rw-r--r-- | kernel/trace/trace_uprobe.c | 3 | 
21 files changed, 753 insertions, 218 deletions
| diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 2c6611c13f99..197545241ab8 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -82,6 +82,13 @@ config HAVE_OBJTOOL_MCOUNT  	help  	  Arch supports objtool --mcount +config HAVE_OBJTOOL_NOP_MCOUNT +	bool +	help +	  Arch supports the objtool options --mcount with --mnop. +	  An architecture can select this if it wants to enable nop'ing +	  of ftrace locations. +  config HAVE_C_RECORDMCOUNT  	bool  	help @@ -375,6 +382,7 @@ config SCHED_TRACER  config HWLAT_TRACER  	bool "Tracer to detect hardware latencies (like SMIs)"  	select GENERIC_TRACER +	select TRACER_MAX_TRACE  	help  	 This tracer, when enabled will create one or more kernel threads,  	 depending on what the cpumask file is set to, which each thread @@ -410,6 +418,7 @@ config HWLAT_TRACER  config OSNOISE_TRACER  	bool "OS Noise tracer"  	select GENERIC_TRACER +	select TRACER_MAX_TRACE  	help  	  In the context of high-performance computing (HPC), the Operating  	  System Noise (osnoise) refers to the interference experienced by an diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8e842f68b9a5..442438b93fe9 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -163,7 +163,7 @@ static void ftrace_sync_ipi(void *data)  static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops)  {  	/* -	 * If this is a dynamic, RCU, or per CPU ops, or we force list func, +	 * If this is a dynamic or RCU ops, or we force list func,  	 * then it needs to call the list anyway.  	 */  	if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_RCU) || @@ -2762,6 +2762,19 @@ void __weak ftrace_arch_code_modify_post_process(void)  {  } +static int update_ftrace_func(ftrace_func_t func) +{ +	static ftrace_func_t save_func; + +	/* Avoid updating if it hasn't changed */ +	if (func == save_func) +		return 0; + +	save_func = func; + +	return ftrace_update_ftrace_func(func); +} +  void ftrace_modify_all_code(int command)  {  	int update = command & FTRACE_UPDATE_TRACE_FUNC; @@ -2782,7 +2795,7 @@ void ftrace_modify_all_code(int command)  	 * traced.  	 */  	if (update) { -		err = ftrace_update_ftrace_func(ftrace_ops_list_func); +		err = update_ftrace_func(ftrace_ops_list_func);  		if (FTRACE_WARN_ON(err))  			return;  	} @@ -2798,7 +2811,7 @@ void ftrace_modify_all_code(int command)  		/* If irqs are disabled, we are in stop machine */  		if (!irqs_disabled())  			smp_call_function(ftrace_sync_ipi, NULL, 1); -		err = ftrace_update_ftrace_func(ftrace_trace_function); +		err = update_ftrace_func(ftrace_trace_function);  		if (FTRACE_WARN_ON(err))  			return;  	} @@ -3070,8 +3083,6 @@ out:  	/*  	 * Dynamic ops may be freed, we must make sure that all  	 * callers are done before leaving this function. -	 * The same goes for freeing the per_cpu data of the per_cpu -	 * ops.  	 */  	if (ops->flags & FTRACE_OPS_FL_DYNAMIC) {  		/* @@ -4192,6 +4203,7 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod)  			}  			found = 1;  		} +		cond_resched();  	} while_for_each_ftrace_rec();   out_unlock:  	mutex_unlock(&ftrace_lock); @@ -7518,8 +7530,6 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,  		/*  		 * Check the following for each ops before calling their func:  		 *  if RCU flag is set, then rcu_is_watching() must be true -		 *  if PER_CPU is set, then ftrace_function_local_disable() -		 *                          must be false  		 *  Otherwise test if the ip matches the ops filter  		 *  		 * If any of the above fails then the op->func() is not executed. @@ -7569,8 +7579,8 @@ NOKPROBE_SYMBOL(arch_ftrace_ops_list_func);  /*   * If there's only one function registered but it does not support - * recursion, needs RCU protection and/or requires per cpu handling, then - * this function will be called by the mcount trampoline. + * recursion, needs RCU protection, then this function will be called + * by the mcount trampoline.   */  static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,  				   struct ftrace_ops *op, struct ftrace_regs *fregs) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b21bf14bae9b..c366a0a9ddba 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2062,8 +2062,10 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)  {  	struct list_head *pages = &cpu_buffer->new_pages;  	int retries, success; +	unsigned long flags; -	raw_spin_lock_irq(&cpu_buffer->reader_lock); +	/* Can be called at early boot up, where interrupts must not been enabled */ +	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);  	/*  	 * We are holding the reader lock, so the reader page won't be swapped  	 * in the ring buffer. Now we are racing with the writer trying to @@ -2120,7 +2122,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)  	 * tracing  	 */  	RB_WARN_ON(cpu_buffer, !success); -	raw_spin_unlock_irq(&cpu_buffer->reader_lock); +	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);  	/* free pages if they weren't inserted */  	if (!success) { @@ -2248,8 +2250,16 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,  				rb_update_pages(cpu_buffer);  				cpu_buffer->nr_pages_to_update = 0;  			} else { -				schedule_work_on(cpu, -						&cpu_buffer->update_pages_work); +				/* Run directly if possible. */ +				migrate_disable(); +				if (cpu != smp_processor_id()) { +					migrate_enable(); +					schedule_work_on(cpu, +							 &cpu_buffer->update_pages_work); +				} else { +					update_pages_handler(&cpu_buffer->update_pages_work); +					migrate_enable(); +				}  			}  		} @@ -2298,9 +2308,17 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,  		if (!cpu_online(cpu_id))  			rb_update_pages(cpu_buffer);  		else { -			schedule_work_on(cpu_id, -					 &cpu_buffer->update_pages_work); -			wait_for_completion(&cpu_buffer->update_done); +			/* Run directly if possible. */ +			migrate_disable(); +			if (cpu_id == smp_processor_id()) { +				rb_update_pages(cpu_buffer); +				migrate_enable(); +			} else { +				migrate_enable(); +				schedule_work_on(cpu_id, +						 &cpu_buffer->update_pages_work); +				wait_for_completion(&cpu_buffer->update_done); +			}  		}  		cpu_buffer->nr_pages_to_update = 0; @@ -3180,8 +3198,7 @@ static inline void rb_event_discard(struct ring_buffer_event *event)  		event->time_delta = 1;  } -static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, -		      struct ring_buffer_event *event) +static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer)  {  	local_inc(&cpu_buffer->entries);  	rb_end_commit(cpu_buffer); @@ -3383,15 +3400,14 @@ void ring_buffer_nest_end(struct trace_buffer *buffer)   *   * Must be paired with ring_buffer_lock_reserve.   */ -int ring_buffer_unlock_commit(struct trace_buffer *buffer, -			      struct ring_buffer_event *event) +int ring_buffer_unlock_commit(struct trace_buffer *buffer)  {  	struct ring_buffer_per_cpu *cpu_buffer;  	int cpu = raw_smp_processor_id();  	cpu_buffer = buffer->buffers[cpu]; -	rb_commit(cpu_buffer, event); +	rb_commit(cpu_buffer);  	rb_wakeups(buffer, cpu_buffer); @@ -3977,7 +3993,7 @@ int ring_buffer_write(struct trace_buffer *buffer,  	memcpy(body, data, length); -	rb_commit(cpu_buffer, event); +	rb_commit(cpu_buffer);  	rb_wakeups(buffer, cpu_buffer); @@ -5998,7 +6014,7 @@ static __init int rb_write_something(struct rb_test_data *data, bool nested)  	}   out: -	ring_buffer_unlock_commit(data->buffer, event); +	ring_buffer_unlock_commit(data->buffer);  	return 0;  } diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 78e576575b79..aef34673d79d 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -258,7 +258,7 @@ static void ring_buffer_producer(void)  				hit++;  				entry = ring_buffer_event_data(event);  				*entry = smp_processor_id(); -				ring_buffer_unlock_commit(buffer, event); +				ring_buffer_unlock_commit(buffer);  			}  		}  		end_time = ktime_get(); diff --git a/kernel/trace/rv/monitors/wip/wip.h b/kernel/trace/rv/monitors/wip/wip.h index dacc37b62a2c..2e373f2c65ed 100644 --- a/kernel/trace/rv/monitors/wip/wip.h +++ b/kernel/trace/rv/monitors/wip/wip.h @@ -27,7 +27,7 @@ struct automaton_wip {  	bool final_states[state_max_wip];  }; -static struct automaton_wip automaton_wip = { +static const struct automaton_wip automaton_wip = {  	.state_names = {  		"preemptive",  		"non_preemptive" diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.h b/kernel/trace/rv/monitors/wwnr/wwnr.h index 118e576b91b4..d0d9c4b8121b 100644 --- a/kernel/trace/rv/monitors/wwnr/wwnr.h +++ b/kernel/trace/rv/monitors/wwnr/wwnr.h @@ -27,7 +27,7 @@ struct automaton_wwnr {  	bool final_states[state_max_wwnr];  }; -static struct automaton_wwnr automaton_wwnr = { +static const struct automaton_wwnr automaton_wwnr = {  	.state_names = {  		"not_running",  		"running" diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5cfc95a52bc3..a555a861b978 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -19,7 +19,6 @@  #include <linux/kallsyms.h>  #include <linux/security.h>  #include <linux/seq_file.h> -#include <linux/notifier.h>  #include <linux/irqflags.h>  #include <linux/debugfs.h>  #include <linux/tracefs.h> @@ -85,7 +84,7 @@ void __init disable_tracing_selftest(const char *reason)  #endif  /* Pipe tracepoints to printk */ -struct trace_iterator *tracepoint_print_iter; +static struct trace_iterator *tracepoint_print_iter;  int tracepoint_printk;  static bool tracepoint_printk_stop_on_boot __initdata;  static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); @@ -999,7 +998,7 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev  		/* ring_buffer_unlock_commit() enables preemption */  		preempt_enable_notrace();  	} else -		ring_buffer_unlock_commit(buffer, event); +		ring_buffer_unlock_commit(buffer);  }  /** @@ -1421,6 +1420,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr)  	return false;  }  EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); +#define free_snapshot(tr)	do { } while (0)  #endif /* CONFIG_TRACER_SNAPSHOT */  void tracer_tracing_off(struct trace_array *tr) @@ -1692,6 +1692,8 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)  }  unsigned long __read_mostly	tracing_thresh; + +#ifdef CONFIG_TRACER_MAX_TRACE  static const struct file_operations tracing_max_lat_fops;  #ifdef LATENCY_FS_NOTIFY @@ -1748,18 +1750,14 @@ void latency_fsnotify(struct trace_array *tr)  	irq_work_queue(&tr->fsnotify_irqwork);  } -#elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)	\ -	|| defined(CONFIG_OSNOISE_TRACER) +#else /* !LATENCY_FS_NOTIFY */  #define trace_create_maxlat_file(tr, d_tracer)				\  	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\  			  d_tracer, &tr->max_latency, &tracing_max_lat_fops) -#else -#define trace_create_maxlat_file(tr, d_tracer)	 do { } while (0)  #endif -#ifdef CONFIG_TRACER_MAX_TRACE  /*   * Copy the new maximum trace into the separate maximum-trace   * structure. (this way the maximum trace is permanently saved, @@ -1834,14 +1832,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,  		ring_buffer_record_off(tr->max_buffer.buffer);  #ifdef CONFIG_TRACER_SNAPSHOT -	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) -		goto out_unlock; +	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) { +		arch_spin_unlock(&tr->max_lock); +		return; +	}  #endif  	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);  	__update_max_tr(tr, tsk, cpu); - out_unlock:  	arch_spin_unlock(&tr->max_lock);  } @@ -1888,6 +1887,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)  	__update_max_tr(tr, tsk, cpu);  	arch_spin_unlock(&tr->max_lock);  } +  #endif /* CONFIG_TRACER_MAX_TRACE */  static int wait_on_pipe(struct trace_iterator *iter, int full) @@ -5617,7 +5617,7 @@ static const char readme_msg[] =  	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"  	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"  	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n" -	"\t           <type>\\[<array-size>\\]\n" +	"\t           symstr, <type>\\[<array-size>\\]\n"  #ifdef CONFIG_HIST_TRIGGERS  	"\t    field: <stype> <name>;\n"  	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n" @@ -5678,6 +5678,7 @@ static const char readme_msg[] =  	"\t            [:size=#entries]\n"  	"\t            [:pause][:continue][:clear]\n"  	"\t            [:name=histname1]\n" +	"\t            [:nohitcount]\n"  	"\t            [:<handler>.<action>]\n"  	"\t            [if <filter>]\n\n"  	"\t    Note, special fields can be used as well:\n" @@ -5724,7 +5725,9 @@ static const char readme_msg[] =  	"\t            .syscall    display a syscall id as a syscall name\n"  	"\t            .log2       display log2 value rather than raw number\n"  	"\t            .buckets=size  display values in groups of size rather than raw number\n" -	"\t            .usecs      display a common_timestamp in microseconds\n\n" +	"\t            .usecs      display a common_timestamp in microseconds\n" +	"\t            .percent    display a number of percentage value\n" +	"\t            .graph      display a bar-graph of a value\n\n"  	"\t    The 'pause' parameter can be used to pause an existing hist\n"  	"\t    trigger or to start a hist trigger but not log any events\n"  	"\t    until told to do so.  'continue' can be used to start or\n" @@ -5732,6 +5735,8 @@ static const char readme_msg[] =  	"\t    The 'clear' parameter will clear the contents of a running\n"  	"\t    hist trigger and leave its current paused/active state\n"  	"\t    unchanged.\n\n" +	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n" +	"\t    raw hitcount in the histogram.\n\n"  	"\t    The enable_hist and disable_hist triggers can be used to\n"  	"\t    have one event conditionally start and stop another event's\n"  	"\t    already-attached hist trigger.  The syntax is analogous to\n" @@ -6572,7 +6577,7 @@ out:  	return ret;  } -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE  static ssize_t  tracing_max_lat_read(struct file *filp, char __user *ubuf, @@ -6796,7 +6801,20 @@ waitagain:  		ret = print_trace_line(iter);  		if (ret == TRACE_TYPE_PARTIAL_LINE) { -			/* don't print partial lines */ +			/* +			 * If one print_trace_line() fills entire trace_seq in one shot, +			 * trace_seq_to_user() will returns -EBUSY because save_len == 0, +			 * In this case, we need to consume it, otherwise, loop will peek +			 * this event next time, resulting in an infinite loop. +			 */ +			if (save_len == 0) { +				iter->seq.full = 0; +				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n"); +				trace_consume(iter); +				break; +			} + +			/* In other cases, don't print partial lines */  			iter->seq.seq.len = save_len;  			break;  		} @@ -7587,7 +7605,7 @@ static const struct file_operations tracing_thresh_fops = {  	.llseek		= generic_file_llseek,  }; -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE  static const struct file_operations tracing_max_lat_fops = {  	.open		= tracing_open_generic,  	.read		= tracing_max_lat_read, @@ -9601,7 +9619,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)  	create_trace_options_dir(tr); +#ifdef CONFIG_TRACER_MAX_TRACE  	trace_create_maxlat_file(tr, d_tracer); +#endif  	if (ftrace_create_function_files(tr, d_tracer))  		MEM_FAIL(1, "Could not allocate function filter files"); @@ -9855,41 +9875,41 @@ static __init int tracer_init_tracefs(void)  fs_initcall(tracer_init_tracefs); -static int trace_panic_handler(struct notifier_block *this, -			       unsigned long event, void *unused) -{ -	if (ftrace_dump_on_oops) -		ftrace_dump(ftrace_dump_on_oops); -	return NOTIFY_OK; -} +static int trace_die_panic_handler(struct notifier_block *self, +				unsigned long ev, void *unused);  static struct notifier_block trace_panic_notifier = { -	.notifier_call  = trace_panic_handler, -	.next           = NULL, -	.priority       = 150   /* priority: INT_MAX >= x >= 0 */ +	.notifier_call = trace_die_panic_handler, +	.priority = INT_MAX - 1,  }; -static int trace_die_handler(struct notifier_block *self, -			     unsigned long val, -			     void *data) -{ -	switch (val) { -	case DIE_OOPS: -		if (ftrace_dump_on_oops) -			ftrace_dump(ftrace_dump_on_oops); -		break; -	default: -		break; -	} -	return NOTIFY_OK; -} -  static struct notifier_block trace_die_notifier = { -	.notifier_call = trace_die_handler, -	.priority = 200 +	.notifier_call = trace_die_panic_handler, +	.priority = INT_MAX - 1,  };  /* + * The idea is to execute the following die/panic callback early, in order + * to avoid showing irrelevant information in the trace (like other panic + * notifier functions); we are the 2nd to run, after hung_task/rcu_stall + * warnings get disabled (to prevent potential log flooding). + */ +static int trace_die_panic_handler(struct notifier_block *self, +				unsigned long ev, void *unused) +{ +	if (!ftrace_dump_on_oops) +		return NOTIFY_DONE; + +	/* The die notifier requires DIE_OOPS to trigger */ +	if (self == &trace_die_notifier && ev != DIE_OOPS) +		return NOTIFY_DONE; + +	ftrace_dump(ftrace_dump_on_oops); + +	return NOTIFY_DONE; +} + +/*   * printk is set to max of 1024, we really don't need it that big.   * Nothing should be printing 1000 characters anyway.   */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index d42e24507152..e46a49269be2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -308,8 +308,7 @@ struct trace_array {  	struct array_buffer	max_buffer;  	bool			allocated_snapshot;  #endif -#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ -	|| defined(CONFIG_OSNOISE_TRACER) +#ifdef CONFIG_TRACER_MAX_TRACE  	unsigned long		max_latency;  #ifdef CONFIG_FSNOTIFY  	struct dentry		*d_max_latency; @@ -615,7 +614,7 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,  bool trace_is_tracepoint_string(const char *str);  const char *trace_event_format(struct trace_iterator *iter, const char *fmt);  void trace_check_vprintf(struct trace_iterator *iter, const char *fmt, -			 va_list ap); +			 va_list ap) __printf(2, 0);  int trace_empty(struct trace_iterator *iter); @@ -688,12 +687,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,  		   void *cond_data);  void update_max_tr_single(struct trace_array *tr,  			  struct task_struct *tsk, int cpu); -#endif /* CONFIG_TRACER_MAX_TRACE */ -#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \ -	|| defined(CONFIG_OSNOISE_TRACER)) && defined(CONFIG_FSNOTIFY) +#ifdef CONFIG_FSNOTIFY  #define LATENCY_FS_NOTIFY  #endif +#endif /* CONFIG_TRACER_MAX_TRACE */  #ifdef LATENCY_FS_NOTIFY  void latency_fsnotify(struct trace_array *tr); @@ -1942,8 +1940,6 @@ static inline void tracer_hardirqs_on(unsigned long a0, unsigned long a1) { }  static inline void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { }  #endif -extern struct trace_iterator *tracepoint_print_iter; -  /*   * Reset the state of the trace_iterator so that it can read consumed data.   * Normally, the trace_iterator is used for reading the data when it is not @@ -1956,17 +1952,30 @@ static __always_inline void trace_iterator_reset(struct trace_iterator *iter)  }  /* Check the name is good for event/group/fields */ -static inline bool is_good_name(const char *name) +static inline bool __is_good_name(const char *name, bool hash_ok)  { -	if (!isalpha(*name) && *name != '_') +	if (!isalpha(*name) && *name != '_' && (!hash_ok || *name != '-'))  		return false;  	while (*++name != '\0') { -		if (!isalpha(*name) && !isdigit(*name) && *name != '_') +		if (!isalpha(*name) && !isdigit(*name) && *name != '_' && +		    (!hash_ok || *name != '-'))  			return false;  	}  	return true;  } +/* Check the name is good for event/group/fields */ +static inline bool is_good_name(const char *name) +{ +	return __is_good_name(name, false); +} + +/* Check the name is good for system */ +static inline bool is_good_system_name(const char *name) +{ +	return __is_good_name(name, true); +} +  /* Convert certain expected symbols into '_' when generating event names */  static inline void sanitize_event_name(char *name)  { diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 61e3a2620fa3..05e791241812 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -251,16 +251,12 @@ int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe)  	struct trace_event_call *tp_event;  	if (p_event->attr.kprobe_func) { -		func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL); -		if (!func) -			return -ENOMEM; -		ret = strncpy_from_user( -			func, u64_to_user_ptr(p_event->attr.kprobe_func), -			KSYM_NAME_LEN); -		if (ret == KSYM_NAME_LEN) -			ret = -E2BIG; -		if (ret < 0) -			goto out; +		func = strndup_user(u64_to_user_ptr(p_event->attr.kprobe_func), +				    KSYM_NAME_LEN); +		if (IS_ERR(func)) { +			ret = PTR_ERR(func); +			return (ret == -EINVAL) ? -E2BIG : ret; +		}  		if (func[0] == '\0') {  			kfree(func); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f71ea6e79b3c..33e0b4f8ebe6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2796,6 +2796,42 @@ trace_create_new_event(struct trace_event_call *call,  	return file;  } +#define MAX_BOOT_TRIGGERS 32 + +static struct boot_triggers { +	const char		*event; +	char			*trigger; +} bootup_triggers[MAX_BOOT_TRIGGERS]; + +static char bootup_trigger_buf[COMMAND_LINE_SIZE]; +static int nr_boot_triggers; + +static __init int setup_trace_triggers(char *str) +{ +	char *trigger; +	char *buf; +	int i; + +	strlcpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE); +	ring_buffer_expanded = true; +	disable_tracing_selftest("running event triggers"); + +	buf = bootup_trigger_buf; +	for (i = 0; i < MAX_BOOT_TRIGGERS; i++) { +		trigger = strsep(&buf, ","); +		if (!trigger) +			break; +		bootup_triggers[i].event = strsep(&trigger, "."); +		bootup_triggers[i].trigger = strsep(&trigger, "."); +		if (!bootup_triggers[i].trigger) +			break; +	} + +	nr_boot_triggers = i; +	return 1; +} +__setup("trace_trigger=", setup_trace_triggers); +  /* Add an event to a trace directory */  static int  __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr) @@ -2812,6 +2848,24 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)  		return event_define_fields(call);  } +static void trace_early_triggers(struct trace_event_file *file, const char *name) +{ +	int ret; +	int i; + +	for (i = 0; i < nr_boot_triggers; i++) { +		if (strcmp(name, bootup_triggers[i].event)) +			continue; +		mutex_lock(&event_mutex); +		ret = trigger_process_regex(file, bootup_triggers[i].trigger); +		mutex_unlock(&event_mutex); +		if (ret) +			pr_err("Failed to register trigger '%s' on event %s\n", +			       bootup_triggers[i].trigger, +			       bootup_triggers[i].event); +	} +} +  /*   * Just create a descriptor for early init. A descriptor is required   * for enabling events at boot. We want to enable events before @@ -2822,12 +2876,19 @@ __trace_early_add_new_event(struct trace_event_call *call,  			    struct trace_array *tr)  {  	struct trace_event_file *file; +	int ret;  	file = trace_create_new_event(call, tr);  	if (!file)  		return -ENOMEM; -	return event_define_fields(call); +	ret = event_define_fields(call); +	if (ret) +		return ret; + +	trace_early_triggers(file, trace_event_name(call)); + +	return 0;  }  struct ftrace_module_file_ops; @@ -3735,6 +3796,8 @@ static __init int event_trace_enable(void)  			list_add(&call->list, &ftrace_events);  	} +	register_trigger_cmds(); +  	/*  	 * We need the top trace array to have a working set of trace  	 * points at early init, before the debug files and directories @@ -3749,7 +3812,6 @@ static __init int event_trace_enable(void)  	register_event_cmds(); -	register_trigger_cmds();  	return 0;  } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 1c82478e8dff..fcaf226b7744 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -69,7 +69,8 @@  	C(INVALID_STR_OPERAND,	"String type can not be an operand in expression"), \  	C(EXPECT_NUMBER,	"Expecting numeric literal"),		\  	C(UNARY_MINUS_SUBEXPR,	"Unary minus not supported in sub-expressions"), \ -	C(DIVISION_BY_ZERO,	"Division by zero"), +	C(DIVISION_BY_ZERO,	"Division by zero"),			\ +	C(NEED_NOHC_VAL,	"Non-hitcount value is required for 'nohitcount'"),  #undef C  #define C(a, b)		HIST_ERR_##a @@ -506,6 +507,8 @@ enum hist_field_flags {  	HIST_FIELD_FL_ALIAS		= 1 << 16,  	HIST_FIELD_FL_BUCKET		= 1 << 17,  	HIST_FIELD_FL_CONST		= 1 << 18, +	HIST_FIELD_FL_PERCENT		= 1 << 19, +	HIST_FIELD_FL_GRAPH		= 1 << 20,  };  struct var_defs { @@ -524,6 +527,7 @@ struct hist_trigger_attrs {  	bool		cont;  	bool		clear;  	bool		ts_in_usecs; +	bool		no_hitcount;  	unsigned int	map_bits;  	char		*assignment_str[TRACING_MAP_VARS_MAX]; @@ -617,7 +621,7 @@ struct action_data {  	 * event param, and is passed to the synthetic event  	 * invocation.  	 */ -	unsigned int		var_ref_idx[TRACING_MAP_VARS_MAX]; +	unsigned int		var_ref_idx[SYNTH_FIELDS_MAX];  	struct synth_event	*synth_event;  	bool			use_trace_keyword;  	char			*synth_event_name; @@ -1356,6 +1360,8 @@ static const char *hist_field_name(struct hist_field *field,  			field_name = field->name;  	} else if (field->flags & HIST_FIELD_FL_TIMESTAMP)  		field_name = "common_timestamp"; +	else if (field->flags & HIST_FIELD_FL_HITCOUNT) +		field_name = "hitcount";  	if (field_name == NULL)  		field_name = ""; @@ -1546,7 +1552,10 @@ parse_hist_trigger_attrs(struct trace_array *tr, char *trigger_str)  			ret = parse_assignment(tr, str, attrs);  			if (ret)  				goto free; -		} else if (strcmp(str, "pause") == 0) +		} else if (strcmp(str, "nohitcount") == 0 || +			   strcmp(str, "NOHC") == 0) +			attrs->no_hitcount = true; +		else if (strcmp(str, "pause") == 0)  			attrs->pause = true;  		else if ((strcmp(str, "cont") == 0) ||  			 (strcmp(str, "continue") == 0)) @@ -1705,6 +1714,10 @@ static const char *get_hist_field_flags(struct hist_field *hist_field)  		flags_str = "buckets";  	else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)  		flags_str = "usecs"; +	else if (hist_field->flags & HIST_FIELD_FL_PERCENT) +		flags_str = "percent"; +	else if (hist_field->flags & HIST_FIELD_FL_GRAPH) +		flags_str = "graph";  	return flags_str;  } @@ -2173,7 +2186,9 @@ static struct hist_field *create_var_ref(struct hist_trigger_data *hist_data,  			return ref_field;  		}  	} - +	/* Sanity check to avoid out-of-bound write on 'hist_data->var_refs' */ +	if (hist_data->n_var_refs >= TRACING_MAP_VARS_MAX) +		return NULL;  	ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL);  	if (ref_field) {  		if (init_var_ref(ref_field, var_field, system, event_name)) { @@ -2313,6 +2328,14 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,  			if (ret || !(*buckets))  				goto error;  			*flags |= HIST_FIELD_FL_BUCKET; +		} else if (strncmp(modifier, "percent", 7) == 0) { +			if (*flags & (HIST_FIELD_FL_VAR | HIST_FIELD_FL_KEY)) +				goto error; +			*flags |= HIST_FIELD_FL_PERCENT; +		} else if (strncmp(modifier, "graph", 5) == 0) { +			if (*flags & (HIST_FIELD_FL_VAR | HIST_FIELD_FL_KEY)) +				goto error; +			*flags |= HIST_FIELD_FL_GRAPH;  		} else {   error:  			hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier)); @@ -2328,6 +2351,8 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,  			hist_data->attrs->ts_in_usecs = true;  	} else if (strcmp(field_name, "common_cpu") == 0)  		*flags |= HIST_FIELD_FL_CPU; +	else if (strcmp(field_name, "hitcount") == 0) +		*flags |= HIST_FIELD_FL_HITCOUNT;  	else {  		field = trace_find_event_field(file->event_call, field_name);  		if (!field || !field->size) { @@ -3586,6 +3611,7 @@ static int parse_action_params(struct trace_array *tr, char *params,  	while (params) {  		if (data->n_params >= SYNTH_FIELDS_MAX) {  			hist_err(tr, HIST_ERR_TOO_MANY_PARAMS, 0); +			ret = -EINVAL;  			goto out;  		} @@ -3922,6 +3948,10 @@ static int trace_action_create(struct hist_trigger_data *hist_data,  	lockdep_assert_held(&event_mutex); +	/* Sanity check to avoid out-of-bound write on 'data->var_ref_idx' */ +	if (data->n_params > SYNTH_FIELDS_MAX) +		return -EINVAL; +  	if (data->use_trace_keyword)  		synth_event_name = data->synth_event_name;  	else @@ -4328,8 +4358,8 @@ static int create_var_field(struct hist_trigger_data *hist_data,  static int create_val_fields(struct hist_trigger_data *hist_data,  			     struct trace_event_file *file)  { +	unsigned int i, j = 1, n_hitcount = 0;  	char *fields_str, *field_str; -	unsigned int i, j = 1;  	int ret;  	ret = create_hitcount_val(hist_data); @@ -4346,8 +4376,10 @@ static int create_val_fields(struct hist_trigger_data *hist_data,  		if (!field_str)  			break; -		if (strcmp(field_str, "hitcount") == 0) -			continue; +		if (strcmp(field_str, "hitcount") == 0) { +			if (!n_hitcount++) +				continue; +		}  		ret = create_val_field(hist_data, j++, file, field_str);  		if (ret) @@ -4357,6 +4389,12 @@ static int create_val_fields(struct hist_trigger_data *hist_data,  	if (fields_str && (strcmp(fields_str, "hitcount") != 0))  		ret = -EINVAL;   out: +	/* There is only raw hitcount but nohitcount suppresses it. */ +	if (j == 1 && hist_data->attrs->no_hitcount) { +		hist_err(hist_data->event_file->tr, HIST_ERR_NEED_NOHC_VAL, 0); +		ret = -ENOENT; +	} +  	return ret;  } @@ -5285,33 +5323,101 @@ static void hist_trigger_print_key(struct seq_file *m,  	seq_puts(m, "}");  } +/* Get the 100 times of the percentage of @val in @total */ +static inline unsigned int __get_percentage(u64 val, u64 total) +{ +	if (!total) +		goto div0; + +	if (val < (U64_MAX / 10000)) +		return (unsigned int)div64_ul(val * 10000, total); + +	total = div64_u64(total, 10000); +	if (!total) +		goto div0; + +	return (unsigned int)div64_ul(val, total); +div0: +	return val ? UINT_MAX : 0; +} + +#define BAR_CHAR '#' + +static inline const char *__fill_bar_str(char *buf, int size, u64 val, u64 max) +{ +	unsigned int len = __get_percentage(val, max); +	int i; + +	if (len == UINT_MAX) { +		snprintf(buf, size, "[ERROR]"); +		return buf; +	} + +	len = len * size / 10000; +	for (i = 0; i < len && i < size; i++) +		buf[i] = BAR_CHAR; +	while (i < size) +		buf[i++] = ' '; +	buf[size] = '\0'; + +	return buf; +} + +struct hist_val_stat { +	u64 max; +	u64 total; +}; + +static void hist_trigger_print_val(struct seq_file *m, unsigned int idx, +				   const char *field_name, unsigned long flags, +				   struct hist_val_stat *stats, +				   struct tracing_map_elt *elt) +{ +	u64 val = tracing_map_read_sum(elt, idx); +	unsigned int pc; +	char bar[21]; + +	if (flags & HIST_FIELD_FL_PERCENT) { +		pc = __get_percentage(val, stats[idx].total); +		if (pc == UINT_MAX) +			seq_printf(m, " %s (%%):[ERROR]", field_name); +		else +			seq_printf(m, " %s (%%): %3u.%02u", field_name, +					pc / 100, pc % 100); +	} else if (flags & HIST_FIELD_FL_GRAPH) { +		seq_printf(m, " %s: %20s", field_name, +			   __fill_bar_str(bar, 20, val, stats[idx].max)); +	} else if (flags & HIST_FIELD_FL_HEX) { +		seq_printf(m, " %s: %10llx", field_name, val); +	} else { +		seq_printf(m, " %s: %10llu", field_name, val); +	} +} +  static void hist_trigger_entry_print(struct seq_file *m,  				     struct hist_trigger_data *hist_data, +				     struct hist_val_stat *stats,  				     void *key,  				     struct tracing_map_elt *elt)  {  	const char *field_name; -	unsigned int i; +	unsigned int i = HITCOUNT_IDX; +	unsigned long flags;  	hist_trigger_print_key(m, hist_data, key, elt); -	seq_printf(m, " hitcount: %10llu", -		   tracing_map_read_sum(elt, HITCOUNT_IDX)); +	/* At first, show the raw hitcount if !nohitcount */ +	if (!hist_data->attrs->no_hitcount) +		hist_trigger_print_val(m, i, "hitcount", 0, stats, elt);  	for (i = 1; i < hist_data->n_vals; i++) {  		field_name = hist_field_name(hist_data->fields[i], 0); - -		if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR || -		    hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR) +		flags = hist_data->fields[i]->flags; +		if (flags & HIST_FIELD_FL_VAR || flags & HIST_FIELD_FL_EXPR)  			continue; -		if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) { -			seq_printf(m, "  %s: %10llx", field_name, -				   tracing_map_read_sum(elt, i)); -		} else { -			seq_printf(m, "  %s: %10llu", field_name, -				   tracing_map_read_sum(elt, i)); -		} +		seq_puts(m, " "); +		hist_trigger_print_val(m, i, field_name, flags, stats, elt);  	}  	print_actions(m, hist_data, elt); @@ -5324,7 +5430,9 @@ static int print_entries(struct seq_file *m,  {  	struct tracing_map_sort_entry **sort_entries = NULL;  	struct tracing_map *map = hist_data->map; -	int i, n_entries; +	int i, j, n_entries; +	struct hist_val_stat *stats = NULL; +	u64 val;  	n_entries = tracing_map_sort_entries(map, hist_data->sort_keys,  					     hist_data->n_sort_keys, @@ -5332,11 +5440,34 @@ static int print_entries(struct seq_file *m,  	if (n_entries < 0)  		return n_entries; +	/* Calculate the max and the total for each field if needed. */ +	for (j = 0; j < hist_data->n_vals; j++) { +		if (!(hist_data->fields[j]->flags & +			(HIST_FIELD_FL_PERCENT | HIST_FIELD_FL_GRAPH))) +			continue; +		if (!stats) { +			stats = kcalloc(hist_data->n_vals, sizeof(*stats), +				       GFP_KERNEL); +			if (!stats) { +				n_entries = -ENOMEM; +				goto out; +			} +		} +		for (i = 0; i < n_entries; i++) { +			val = tracing_map_read_sum(sort_entries[i]->elt, j); +			stats[j].total += val; +			if (stats[j].max < val) +				stats[j].max = val; +		} +	} +  	for (i = 0; i < n_entries; i++) -		hist_trigger_entry_print(m, hist_data, +		hist_trigger_entry_print(m, hist_data, stats,  					 sort_entries[i]->key,  					 sort_entries[i]->elt); +	kfree(stats); +out:  	tracing_map_destroy_sort_entries(sort_entries, n_entries);  	return n_entries; @@ -5726,6 +5857,7 @@ static int event_hist_trigger_print(struct seq_file *m,  	struct hist_trigger_data *hist_data = data->private_data;  	struct hist_field *field;  	bool have_var = false; +	bool show_val = false;  	unsigned int i;  	seq_puts(m, HIST_PREFIX); @@ -5756,12 +5888,16 @@ static int event_hist_trigger_print(struct seq_file *m,  			continue;  		} -		if (i == HITCOUNT_IDX) +		if (i == HITCOUNT_IDX) { +			if (hist_data->attrs->no_hitcount) +				continue;  			seq_puts(m, "hitcount"); -		else { -			seq_puts(m, ","); +		} else { +			if (show_val) +				seq_puts(m, ",");  			hist_field_print(m, field);  		} +		show_val = true;  	}  	if (have_var) { @@ -5812,6 +5948,8 @@ static int event_hist_trigger_print(struct seq_file *m,  	seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits));  	if (hist_data->enable_timestamps)  		seq_printf(m, ":clock=%s", hist_data->attrs->clock); +	if (hist_data->attrs->no_hitcount) +		seq_puts(m, ":nohitcount");  	print_actions_spec(m, hist_data); @@ -6438,7 +6576,7 @@ enable:  	if (se)  		se->ref++;   out: -	if (ret == 0) +	if (ret == 0 && glob[0])  		hist_err_clear();  	return ret; diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index c3b582d19b62..67592eed0be8 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -1282,12 +1282,12 @@ static int __create_synth_event(const char *name, const char *raw_fields)  				goto err_free_arg;  			} -			fields[n_fields++] = field;  			if (n_fields == SYNTH_FIELDS_MAX) {  				synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0);  				ret = -EINVAL;  				goto err_free_arg;  			} +			fields[n_fields++] = field;  			n_fields_this_loop++;  		} diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 918730d74932..e535959939d3 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -1067,7 +1067,14 @@ int set_trigger_filter(char *filter_str,  	/* The filter is for the 'trigger' event, not the triggered event */  	ret = create_event_filter(file->tr, file->event_call, -				  filter_str, false, &filter); +				  filter_str, true, &filter); + +	/* Only enabled set_str for error handling */ +	if (filter) { +		kfree(filter->filter_string); +		filter->filter_string = NULL; +	} +  	/*  	 * If create_event_filter() fails, filter still needs to be freed.  	 * Which the calling code will do with data->filter. @@ -1078,8 +1085,14 @@ int set_trigger_filter(char *filter_str,  	rcu_assign_pointer(data->filter, filter);  	if (tmp) { -		/* Make sure the call is done with the filter */ -		tracepoint_synchronize_unregister(); +		/* +		 * Make sure the call is done with the filter. +		 * It is possible that a filter could fail at boot up, +		 * and then this path will be called. Avoid the synchronization +		 * in that case. +		 */ +		if (system_state != SYSTEM_BOOTING) +			tracepoint_synchronize_unregister();  		free_event_filter(tmp);  	} diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index a93ed1c49b08..908e8a13c675 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1359,6 +1359,7 @@ put_user_lock:  put_user:  	user_event_destroy_fields(user);  	user_event_destroy_validators(user); +	kfree(user->call.print_fmt);  	kfree(user);  	return ret;  } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5a75b039e586..ee77c8203bd5 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1344,7 +1344,6 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,  		return;  	fbuffer.regs = regs; -	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);  	entry->ip = (unsigned long)tk->rp.kp.addr;  	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); @@ -1385,7 +1384,6 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,  		return;  	fbuffer.regs = regs; -	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);  	entry->func = (unsigned long)tk->rp.kp.addr;  	entry->ret_ip = get_kretprobe_retaddr(ri);  	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 4300c5dc4e5d..94c1b5eb1dc0 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -49,6 +49,28 @@  #define DEFAULT_TIMERLAT_PRIO	95			/* FIFO 95 */  /* + * osnoise/options entries. + */ +enum osnoise_options_index { +	OSN_DEFAULTS = 0, +	OSN_WORKLOAD, +	OSN_PANIC_ON_STOP, +	OSN_PREEMPT_DISABLE, +	OSN_IRQ_DISABLE, +	OSN_MAX +}; + +static const char * const osnoise_options_str[OSN_MAX] = { +							"DEFAULTS", +							"OSNOISE_WORKLOAD", +							"PANIC_ON_STOP", +							"OSNOISE_PREEMPT_DISABLE", +							"OSNOISE_IRQ_DISABLE" }; + +#define OSN_DEFAULT_OPTIONS		0x2 +static unsigned long osnoise_options	= OSN_DEFAULT_OPTIONS; + +/*   * trace_array of the enabled osnoise/timerlat instances.   */  struct osnoise_instance { @@ -1173,11 +1195,12 @@ trace_sched_switch_callback(void *data, bool preempt,  			    unsigned int prev_state)  {  	struct osnoise_variables *osn_var = this_cpu_osn_var(); +	int workload = test_bit(OSN_WORKLOAD, &osnoise_options); -	if (p->pid != osn_var->pid) +	if ((p->pid != osn_var->pid) || !workload)  		thread_exit(osn_var, p); -	if (n->pid != osn_var->pid) +	if ((n->pid != osn_var->pid) || !workload)  		thread_entry(osn_var, n);  } @@ -1255,6 +1278,9 @@ static __always_inline void osnoise_stop_tracing(void)  		trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,  				"stop tracing hit on cpu %d\n", smp_processor_id()); +		if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) +			panic("tracer hit stop condition on CPU %d\n", smp_processor_id()); +  		tracer_tracing_off(tr);  	}  	rcu_read_unlock(); @@ -1289,12 +1315,14 @@ static void notify_new_max_latency(u64 latency)   */  static int run_osnoise(void)  { +	bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options);  	struct osnoise_variables *osn_var = this_cpu_osn_var();  	u64 start, sample, last_sample;  	u64 last_int_count, int_count;  	s64 noise = 0, max_noise = 0;  	s64 total, last_total = 0;  	struct osnoise_sample s; +	bool disable_preemption;  	unsigned int threshold;  	u64 runtime, stop_in;  	u64 sum_noise = 0; @@ -1302,6 +1330,12 @@ static int run_osnoise(void)  	int ret = -1;  	/* +	 * Disabling preemption is only required if IRQs are enabled, +	 * and the options is set on. +	 */ +	disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options); + +	/*  	 * Considers the current thread as the workload.  	 */  	osn_var->pid = current->pid; @@ -1317,6 +1351,15 @@ static int run_osnoise(void)  	threshold = tracing_thresh ? : 5000;  	/* +	 * Apply PREEMPT and IRQ disabled options. +	 */ +	if (disable_irq) +		local_irq_disable(); + +	if (disable_preemption) +		preempt_disable(); + +	/*  	 * Make sure NMIs see sampling first  	 */  	osn_var->sampling = true; @@ -1403,16 +1446,21 @@ static int run_osnoise(void)  		 * cond_resched()  		 */  		if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { -			local_irq_disable(); +			if (!disable_irq) +				local_irq_disable(); +  			rcu_momentary_dyntick_idle(); -			local_irq_enable(); + +			if (!disable_irq) +				local_irq_enable();  		}  		/*  		 * For the non-preemptive kernel config: let threads runs, if -		 * they so wish. +		 * they so wish, unless set not do to so.  		 */ -		cond_resched(); +		if (!disable_irq && !disable_preemption) +			cond_resched();  		last_sample = sample;  		last_int_count = int_count; @@ -1432,6 +1480,15 @@ static int run_osnoise(void)  	barrier();  	/* +	 * Return to the preemptive state. +	 */ +	if (disable_preemption) +		preempt_enable(); + +	if (disable_irq) +		local_irq_enable(); + +	/*  	 * Save noise info.  	 */  	s.noise = time_to_us(sum_noise); @@ -1710,9 +1767,16 @@ static void stop_kthread(unsigned int cpu)  	struct task_struct *kthread;  	kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; -	if (kthread) +	if (kthread) {  		kthread_stop(kthread); -	per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; +		per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; +	} else { +		if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { +			per_cpu(per_cpu_osnoise_var, cpu).sampling = false; +			barrier(); +			return; +		} +	}  }  /* @@ -1746,6 +1810,13 @@ static int start_kthread(unsigned int cpu)  		snprintf(comm, 24, "timerlat/%d", cpu);  		main = timerlat_main;  	} else { +		/* if no workload, just return */ +		if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { +			per_cpu(per_cpu_osnoise_var, cpu).sampling = true; +			barrier(); +			return 0; +		} +  		snprintf(comm, 24, "osnoise/%d", cpu);  	} @@ -1861,6 +1932,150 @@ static void osnoise_init_hotplug_support(void)  #endif /* CONFIG_HOTPLUG_CPU */  /* + * seq file functions for the osnoise/options file. + */ +static void *s_options_start(struct seq_file *s, loff_t *pos) +{ +	int option = *pos; + +	mutex_lock(&interface_lock); + +	if (option >= OSN_MAX) +		return NULL; + +	return pos; +} + +static void *s_options_next(struct seq_file *s, void *v, loff_t *pos) +{ +	int option = ++(*pos); + +	if (option >= OSN_MAX) +		return NULL; + +	return pos; +} + +static int s_options_show(struct seq_file *s, void *v) +{ +	loff_t *pos = v; +	int option = *pos; + +	if (option == OSN_DEFAULTS) { +		if (osnoise_options == OSN_DEFAULT_OPTIONS) +			seq_printf(s, "%s", osnoise_options_str[option]); +		else +			seq_printf(s, "NO_%s", osnoise_options_str[option]); +		goto out; +	} + +	if (test_bit(option, &osnoise_options)) +		seq_printf(s, "%s", osnoise_options_str[option]); +	else +		seq_printf(s, "NO_%s", osnoise_options_str[option]); + +out: +	if (option != OSN_MAX) +		seq_puts(s, " "); + +	return 0; +} + +static void s_options_stop(struct seq_file *s, void *v) +{ +	seq_puts(s, "\n"); +	mutex_unlock(&interface_lock); +} + +static const struct seq_operations osnoise_options_seq_ops = { +	.start		= s_options_start, +	.next		= s_options_next, +	.show		= s_options_show, +	.stop		= s_options_stop +}; + +static int osnoise_options_open(struct inode *inode, struct file *file) +{ +	return seq_open(file, &osnoise_options_seq_ops); +}; + +/** + * osnoise_options_write - Write function for "options" entry + * @filp: The active open file structure + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in @file + * + * Writing the option name sets the option, writing the "NO_" + * prefix in front of the option name disables it. + * + * Writing "DEFAULTS" resets the option values to the default ones. + */ +static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, +				     size_t cnt, loff_t *ppos) +{ +	int running, option, enable, retval; +	char buf[256], *option_str; + +	if (cnt >= 256) +		return -EINVAL; + +	if (copy_from_user(buf, ubuf, cnt)) +		return -EFAULT; + +	buf[cnt] = 0; + +	if (strncmp(buf, "NO_", 3)) { +		option_str = strstrip(buf); +		enable = true; +	} else { +		option_str = strstrip(&buf[3]); +		enable = false; +	} + +	option = match_string(osnoise_options_str, OSN_MAX, option_str); +	if (option < 0) +		return -EINVAL; + +	/* +	 * trace_types_lock is taken to avoid concurrency on start/stop. +	 */ +	mutex_lock(&trace_types_lock); +	running = osnoise_has_registered_instances(); +	if (running) +		stop_per_cpu_kthreads(); + +	mutex_lock(&interface_lock); +	/* +	 * avoid CPU hotplug operations that might read options. +	 */ +	cpus_read_lock(); + +	retval = cnt; + +	if (enable) { +		if (option == OSN_DEFAULTS) +			osnoise_options = OSN_DEFAULT_OPTIONS; +		else +			set_bit(option, &osnoise_options); +	} else { +		if (option == OSN_DEFAULTS) +			retval = -EINVAL; +		else +			clear_bit(option, &osnoise_options); +	} + +	cpus_read_unlock(); +	mutex_unlock(&interface_lock); + +	if (running) +		start_per_cpu_kthreads(); +	mutex_unlock(&trace_types_lock); + +	return retval; +} + +/*   * osnoise_cpus_read - Read function for reading the "cpus" file   * @filp: The active open file structure   * @ubuf: The userspace provided buffer to read value into @@ -2042,6 +2257,14 @@ static const struct file_operations cpus_fops = {  	.llseek		= generic_file_llseek,  }; +static const struct file_operations osnoise_options_fops = { +	.open		= osnoise_options_open, +	.read		= seq_read, +	.llseek		= seq_lseek, +	.release	= seq_release, +	.write		= osnoise_options_write +}; +  #ifdef CONFIG_TIMERLAT_TRACER  #ifdef CONFIG_STACKTRACE  static int init_timerlat_stack_tracefs(struct dentry *top_dir) @@ -2128,6 +2351,11 @@ static int init_tracefs(void)  	if (!tmp)  		goto err; +	tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL, +				&osnoise_options_fops); +	if (!tmp) +		goto err; +  	ret = init_timerlat_tracefs(top_dir);  	if (ret)  		goto err; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 67f47ea27921..57a13b61f186 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -11,6 +11,7 @@  #include <linux/kprobes.h>  #include <linux/sched/clock.h>  #include <linux/sched/mm.h> +#include <linux/idr.h>  #include "trace_output.h" @@ -21,8 +22,6 @@ DECLARE_RWSEM(trace_event_sem);  static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly; -static int next_event_type = __TRACE_LAST_TYPE; -  enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter)  {  	struct trace_seq *s = &iter->seq; @@ -323,8 +322,9 @@ void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...)  }  EXPORT_SYMBOL(trace_event_printf); -static int trace_output_raw(struct trace_iterator *iter, char *name, -			    char *fmt, va_list ap) +static __printf(3, 0) +int trace_output_raw(struct trace_iterator *iter, char *name, +		     char *fmt, va_list ap)  {  	struct trace_seq *s = &iter->seq; @@ -688,38 +688,23 @@ struct trace_event *ftrace_find_event(int type)  	return NULL;  } -static LIST_HEAD(ftrace_event_list); +static DEFINE_IDA(trace_event_ida); -static int trace_search_list(struct list_head **list) +static void free_trace_event_type(int type)  { -	struct trace_event *e = NULL, *iter; -	int next = __TRACE_LAST_TYPE; - -	if (list_empty(&ftrace_event_list)) { -		*list = &ftrace_event_list; -		return next; -	} +	if (type >= __TRACE_LAST_TYPE) +		ida_free(&trace_event_ida, type); +} -	/* -	 * We used up all possible max events, -	 * lets see if somebody freed one. -	 */ -	list_for_each_entry(iter, &ftrace_event_list, list) { -		if (iter->type != next) { -			e = iter; -			break; -		} -		next++; -	} +static int alloc_trace_event_type(void) +{ +	int next; -	/* Did we used up all 65 thousand events??? */ -	if (next > TRACE_EVENT_TYPE_MAX) +	/* Skip static defined type numbers */ +	next = ida_alloc_range(&trace_event_ida, __TRACE_LAST_TYPE, +			       TRACE_EVENT_TYPE_MAX, GFP_KERNEL); +	if (next < 0)  		return 0; - -	if (e) -		*list = &e->list; -	else -		*list = &ftrace_event_list;  	return next;  } @@ -761,28 +746,10 @@ int register_trace_event(struct trace_event *event)  	if (WARN_ON(!event->funcs))  		goto out; -	INIT_LIST_HEAD(&event->list); -  	if (!event->type) { -		struct list_head *list = NULL; - -		if (next_event_type > TRACE_EVENT_TYPE_MAX) { - -			event->type = trace_search_list(&list); -			if (!event->type) -				goto out; - -		} else { - -			event->type = next_event_type++; -			list = &ftrace_event_list; -		} - -		if (WARN_ON(ftrace_find_event(event->type))) +		event->type = alloc_trace_event_type(); +		if (!event->type)  			goto out; - -		list_add_tail(&event->list, list); -  	} else if (WARN(event->type > __TRACE_LAST_TYPE,  			"Need to add type to trace.h")) {  		goto out; @@ -819,7 +786,7 @@ EXPORT_SYMBOL_GPL(register_trace_event);  int __unregister_trace_event(struct trace_event *event)  {  	hlist_del(&event->node); -	list_del(&event->list); +	free_trace_event_type(event->type);  	return 0;  } diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 36dff277de46..01ebabbbe8c9 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -76,9 +76,11 @@ const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";  /* Fetch type information table */  static const struct fetch_type probe_fetch_types[] = {  	/* Special types */ -	__ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1, +	__ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1, 1,  			    "__data_loc char[]"), -	__ASSIGN_FETCH_TYPE("ustring", string, string, sizeof(u32), 1, +	__ASSIGN_FETCH_TYPE("ustring", string, string, sizeof(u32), 1, 1, +			    "__data_loc char[]"), +	__ASSIGN_FETCH_TYPE("symstr", string, string, sizeof(u32), 1, 1,  			    "__data_loc char[]"),  	/* Basic types */  	ASSIGN_FETCH_TYPE(u8,  u8,  0), @@ -98,10 +100,15 @@ static const struct fetch_type probe_fetch_types[] = {  	ASSIGN_FETCH_TYPE_END  }; -static const struct fetch_type *find_fetch_type(const char *type) +static const struct fetch_type *find_fetch_type(const char *type, unsigned long flags)  {  	int i; +	/* Reject the symbol/symstr for uprobes */ +	if (type && (flags & TPARG_FL_USER) && +	    (!strcmp(type, "symbol") || !strcmp(type, "symstr"))) +		return NULL; +  	if (!type)  		type = DEFAULT_FETCH_TYPE_STR; @@ -119,13 +126,13 @@ static const struct fetch_type *find_fetch_type(const char *type)  		switch (bs) {  		case 8: -			return find_fetch_type("u8"); +			return find_fetch_type("u8", flags);  		case 16: -			return find_fetch_type("u16"); +			return find_fetch_type("u16", flags);  		case 32: -			return find_fetch_type("u32"); +			return find_fetch_type("u32", flags);  		case 64: -			return find_fetch_type("u64"); +			return find_fetch_type("u64", flags);  		default:  			goto fail;  		} @@ -246,7 +253,7 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup,  			return -EINVAL;  		}  		strlcpy(buf, event, slash - event + 1); -		if (!is_good_name(buf)) { +		if (!is_good_system_name(buf)) {  			trace_probe_log_err(offset, BAD_GROUP_NAME);  			return -EINVAL;  		} @@ -478,7 +485,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,  					    DEREF_OPEN_BRACE);  			return -EINVAL;  		} else { -			const struct fetch_type *t2 = find_fetch_type(NULL); +			const struct fetch_type *t2 = find_fetch_type(NULL, flags);  			*tmp = '\0';  			ret = parse_probe_arg(arg, t2, &code, end, flags, offs); @@ -630,9 +637,9 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,  		/* The type of $comm must be "string", and not an array. */  		if (parg->count || (t && strcmp(t, "string")))  			goto out; -		parg->type = find_fetch_type("string"); +		parg->type = find_fetch_type("string", flags);  	} else -		parg->type = find_fetch_type(t); +		parg->type = find_fetch_type(t, flags);  	if (!parg->type) {  		trace_probe_log_err(offset + (t ? (t - arg) : 0), BAD_TYPE);  		goto out; @@ -662,16 +669,26 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,  	ret = -EINVAL;  	/* Store operation */ -	if (!strcmp(parg->type->name, "string") || -	    !strcmp(parg->type->name, "ustring")) { -		if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF && -		    code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM && -		    code->op != FETCH_OP_DATA && code->op != FETCH_OP_TP_ARG) { -			trace_probe_log_err(offset + (t ? (t - arg) : 0), -					    BAD_STRING); -			goto fail; +	if (parg->type->is_string) { +		if (!strcmp(parg->type->name, "symstr")) { +			if (code->op != FETCH_OP_REG && code->op != FETCH_OP_STACK && +			    code->op != FETCH_OP_RETVAL && code->op != FETCH_OP_ARG && +			    code->op != FETCH_OP_DEREF && code->op != FETCH_OP_TP_ARG) { +				trace_probe_log_err(offset + (t ? (t - arg) : 0), +						    BAD_SYMSTRING); +				goto fail; +			} +		} else { +			if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF && +			    code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM && +			    code->op != FETCH_OP_DATA && code->op != FETCH_OP_TP_ARG) { +				trace_probe_log_err(offset + (t ? (t - arg) : 0), +						    BAD_STRING); +				goto fail; +			}  		} -		if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM || +		if (!strcmp(parg->type->name, "symstr") || +		    (code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM ||  		     code->op == FETCH_OP_DATA) || code->op == FETCH_OP_TP_ARG ||  		     parg->count) {  			/* @@ -679,6 +696,8 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,  			 * must be kept, and if parg->count != 0, this is an  			 * array of string pointers instead of string address  			 * itself. +			 * For the symstr, it doesn't need to dereference, thus +			 * it just get the value.  			 */  			code++;  			if (code->op != FETCH_OP_NOP) { @@ -690,6 +709,8 @@ static int traceprobe_parse_probe_arg_body(const char *argv, ssize_t *size,  		if (!strcmp(parg->type->name, "ustring") ||  		    code->op == FETCH_OP_UDEREF)  			code->op = FETCH_OP_ST_USTRING; +		else if (!strcmp(parg->type->name, "symstr")) +			code->op = FETCH_OP_ST_SYMSTR;  		else  			code->op = FETCH_OP_ST_STRING;  		code->size = parg->type->size; @@ -919,8 +940,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,  	for (i = 0; i < tp->nr_args; i++) {  		parg = tp->args + i;  		if (parg->count) { -			if ((strcmp(parg->type->name, "string") == 0) || -			    (strcmp(parg->type->name, "ustring") == 0)) +			if (parg->type->is_string)  				fmt = ", __get_str(%s[%d])";  			else  				fmt = ", REC->%s[%d]"; @@ -928,8 +948,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,  				pos += snprintf(buf + pos, LEN_OR_ZERO,  						fmt, parg->name, j);  		} else { -			if ((strcmp(parg->type->name, "string") == 0) || -			    (strcmp(parg->type->name, "ustring") == 0)) +			if (parg->type->is_string)  				fmt = ", __get_str(%s)";  			else  				fmt = ", REC->%s"; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index de38f1c03776..23acfd1c3812 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -98,6 +98,7 @@ enum fetch_op {  	FETCH_OP_ST_UMEM,	/* Mem: .offset, .size */  	FETCH_OP_ST_STRING,	/* String: .offset, .size */  	FETCH_OP_ST_USTRING,	/* User String: .offset, .size */ +	FETCH_OP_ST_SYMSTR,	/* Kernel Symbol String: .offset, .size */  	// Stage 4 (modify) op  	FETCH_OP_MOD_BF,	/* Bitfield: .basesize, .lshift, .rshift */  	// Stage 5 (loop) op @@ -133,7 +134,8 @@ struct fetch_insn {  struct fetch_type {  	const char		*name;		/* Name of type */  	size_t			size;		/* Byte size of type */ -	int			is_signed;	/* Signed flag */ +	bool			is_signed;	/* Signed flag */ +	bool			is_string;	/* String flag */  	print_type_func_t	print;		/* Print functions */  	const char		*fmt;		/* Format string */  	const char		*fmttype;	/* Name in format file */ @@ -177,16 +179,19 @@ DECLARE_BASIC_PRINT_TYPE_FUNC(symbol);  #define _ADDR_FETCH_TYPE(t) __ADDR_FETCH_TYPE(t)  #define ADDR_FETCH_TYPE _ADDR_FETCH_TYPE(BITS_PER_LONG) -#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype)	\ -	{.name = _name,				\ +#define __ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, str, _fmttype)	\ +	{.name = _name,					\  	 .size = _size,					\ -	 .is_signed = sign,				\ +	 .is_signed = (bool)sign,			\ +	 .is_string = (bool)str,			\  	 .print = PRINT_TYPE_FUNC_NAME(ptype),		\  	 .fmt = PRINT_TYPE_FMT_NAME(ptype),		\  	 .fmttype = _fmttype,				\  	} + +/* Non string types can use these macros */  #define _ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, _fmttype)	\ -	__ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, #_fmttype) +	__ASSIGN_FETCH_TYPE(_name, ptype, ftype, _size, sign, 0, #_fmttype)  #define ASSIGN_FETCH_TYPE(ptype, ftype, sign)			\  	_ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, ptype) @@ -353,7 +358,8 @@ int trace_probe_create(const char *raw_command, int (*createfn)(int, const char  #define TPARG_FL_KERNEL BIT(1)  #define TPARG_FL_FENTRY BIT(2)  #define TPARG_FL_TPOINT BIT(3) -#define TPARG_FL_MASK	GENMASK(3, 0) +#define TPARG_FL_USER   BIT(4) +#define TPARG_FL_MASK	GENMASK(4, 0)  extern int traceprobe_parse_probe_arg(struct trace_probe *tp, int i,  				const char *argv, unsigned int flags); @@ -431,6 +437,7 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call,  	C(ARRAY_TOO_BIG,	"Array number is too big"),		\  	C(BAD_TYPE,		"Unknown type is specified"),		\  	C(BAD_STRING,		"String accepts only memory argument"),	\ +	C(BAD_SYMSTRING,	"Symbol String doesn't accept data/userdata"),	\  	C(BAD_BITFIELD,		"Invalid bitfield"),			\  	C(ARG_NAME_TOO_LONG,	"Argument name is too long"),		\  	C(NO_ARG_NAME,		"Argument name is not specified"),	\ diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index b3bdb8ddb862..5cea672243f6 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -67,6 +67,37 @@ probe_mem_read(void *dest, void *src, size_t size);  static nokprobe_inline int  probe_mem_read_user(void *dest, void *src, size_t size); +static nokprobe_inline int +fetch_store_symstrlen(unsigned long addr) +{ +	char namebuf[KSYM_SYMBOL_LEN]; +	int ret; + +	ret = sprint_symbol(namebuf, addr); +	if (ret < 0) +		return 0; + +	return ret + 1; +} + +/* + * Fetch a null-terminated symbol string + offset. Caller MUST set *(u32 *)buf + * with max length and relative data location. + */ +static nokprobe_inline int +fetch_store_symstring(unsigned long addr, void *dest, void *base) +{ +	int maxlen = get_loc_len(*(u32 *)dest); +	void *__dest; + +	if (unlikely(!maxlen)) +		return -ENOMEM; + +	__dest = get_loc_data(dest, base); + +	return sprint_symbol(__dest, addr); +} +  /* From the 2nd stage, routine is same */  static nokprobe_inline int  process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, @@ -99,16 +130,22 @@ stage2:  stage3:  	/* 3rd stage: store value to buffer */  	if (unlikely(!dest)) { -		if (code->op == FETCH_OP_ST_STRING) { +		switch (code->op) { +		case FETCH_OP_ST_STRING:  			ret = fetch_store_strlen(val + code->offset);  			code++;  			goto array; -		} else if (code->op == FETCH_OP_ST_USTRING) { +		case FETCH_OP_ST_USTRING:  			ret += fetch_store_strlen_user(val + code->offset);  			code++;  			goto array; -		} else +		case FETCH_OP_ST_SYMSTR: +			ret += fetch_store_symstrlen(val + code->offset); +			code++; +			goto array; +		default:  			return -EILSEQ; +		}  	}  	switch (code->op) { @@ -129,6 +166,10 @@ stage3:  		loc = *(u32 *)dest;  		ret = fetch_store_string_user(val + code->offset, dest, base);  		break; +	case FETCH_OP_ST_SYMSTR: +		loc = *(u32 *)dest; +		ret = fetch_store_symstring(val + code->offset, dest, base); +		break;  	default:  		return -EILSEQ;  	} diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index fb58e86dd117..8d64b6553aed 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -691,7 +691,8 @@ static int __trace_uprobe_create(int argc, const char **argv)  	for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {  		trace_probe_log_set_index(i + 2);  		ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i], -					is_return ? TPARG_FL_RETURN : 0); +					(is_return ? TPARG_FL_RETURN : 0) | +					TPARG_FL_USER);  		if (ret)  			goto error;  	} |