diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/events/core.c | 188 | ||||
| -rw-r--r-- | kernel/time/timer.c | 6 | ||||
| -rw-r--r-- | kernel/trace/trace_event_perf.c | 102 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 91 | ||||
| -rw-r--r-- | kernel/trace/trace_probe.h | 11 | ||||
| -rw-r--r-- | kernel/trace/trace_uprobe.c | 86 | 
6 files changed, 433 insertions, 51 deletions
| diff --git a/kernel/events/core.c b/kernel/events/core.c index 96db9ae5d5af..57898102847f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7996,9 +7996,119 @@ static struct pmu perf_tracepoint = {  	.read		= perf_swevent_read,  }; +#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) +/* + * Flags in config, used by dynamic PMU kprobe and uprobe + * The flags should match following PMU_FORMAT_ATTR(). + * + * PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe + *                               if not set, create kprobe/uprobe + */ +enum perf_probe_config { +	PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0,  /* [k,u]retprobe */ +}; + +PMU_FORMAT_ATTR(retprobe, "config:0"); + +static struct attribute *probe_attrs[] = { +	&format_attr_retprobe.attr, +	NULL, +}; + +static struct attribute_group probe_format_group = { +	.name = "format", +	.attrs = probe_attrs, +}; + +static const struct attribute_group *probe_attr_groups[] = { +	&probe_format_group, +	NULL, +}; +#endif + +#ifdef CONFIG_KPROBE_EVENTS +static int perf_kprobe_event_init(struct perf_event *event); +static struct pmu perf_kprobe = { +	.task_ctx_nr	= perf_sw_context, +	.event_init	= perf_kprobe_event_init, +	.add		= perf_trace_add, +	.del		= perf_trace_del, +	.start		= perf_swevent_start, +	.stop		= perf_swevent_stop, +	.read		= perf_swevent_read, +	.attr_groups	= probe_attr_groups, +}; + +static int perf_kprobe_event_init(struct perf_event *event) +{ +	int err; +	bool is_retprobe; + +	if (event->attr.type != perf_kprobe.type) +		return -ENOENT; +	/* +	 * no branch sampling for probe events +	 */ +	if (has_branch_stack(event)) +		return -EOPNOTSUPP; + +	is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE; +	err = perf_kprobe_init(event, is_retprobe); +	if (err) +		return err; + +	event->destroy = perf_kprobe_destroy; + +	return 0; +} +#endif /* CONFIG_KPROBE_EVENTS */ + +#ifdef CONFIG_UPROBE_EVENTS +static int perf_uprobe_event_init(struct perf_event *event); +static struct pmu perf_uprobe = { +	.task_ctx_nr	= perf_sw_context, +	.event_init	= perf_uprobe_event_init, +	.add		= perf_trace_add, +	.del		= perf_trace_del, +	.start		= perf_swevent_start, +	.stop		= perf_swevent_stop, +	.read		= perf_swevent_read, +	.attr_groups	= probe_attr_groups, +}; + +static int perf_uprobe_event_init(struct perf_event *event) +{ +	int err; +	bool is_retprobe; + +	if (event->attr.type != perf_uprobe.type) +		return -ENOENT; +	/* +	 * no branch sampling for probe events +	 */ +	if (has_branch_stack(event)) +		return -EOPNOTSUPP; + +	is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE; +	err = perf_uprobe_init(event, is_retprobe); +	if (err) +		return err; + +	event->destroy = perf_uprobe_destroy; + +	return 0; +} +#endif /* CONFIG_UPROBE_EVENTS */ +  static inline void perf_tp_register(void)  {  	perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT); +#ifdef CONFIG_KPROBE_EVENTS +	perf_pmu_register(&perf_kprobe, "kprobe", -1); +#endif +#ifdef CONFIG_UPROBE_EVENTS +	perf_pmu_register(&perf_uprobe, "uprobe", -1); +#endif  }  static void perf_event_free_filter(struct perf_event *event) @@ -8075,13 +8185,32 @@ static void perf_event_free_bpf_handler(struct perf_event *event)  }  #endif +/* + * returns true if the event is a tracepoint, or a kprobe/upprobe created + * with perf_event_open() + */ +static inline bool perf_event_is_tracing(struct perf_event *event) +{ +	if (event->pmu == &perf_tracepoint) +		return true; +#ifdef CONFIG_KPROBE_EVENTS +	if (event->pmu == &perf_kprobe) +		return true; +#endif +#ifdef CONFIG_UPROBE_EVENTS +	if (event->pmu == &perf_uprobe) +		return true; +#endif +	return false; +} +  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)  {  	bool is_kprobe, is_tracepoint, is_syscall_tp;  	struct bpf_prog *prog;  	int ret; -	if (event->attr.type != PERF_TYPE_TRACEPOINT) +	if (!perf_event_is_tracing(event))  		return perf_event_set_bpf_handler(event, prog_fd);  	is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE; @@ -8127,7 +8256,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)  static void perf_event_free_bpf_prog(struct perf_event *event)  { -	if (event->attr.type != PERF_TYPE_TRACEPOINT) { +	if (!perf_event_is_tracing(event)) {  		perf_event_free_bpf_handler(event);  		return;  	} @@ -8546,47 +8675,36 @@ fail_clear_files:  	return ret;  } -static int -perf_tracepoint_set_filter(struct perf_event *event, char *filter_str) -{ -	struct perf_event_context *ctx = event->ctx; -	int ret; - -	/* -	 * Beware, here be dragons!! -	 * -	 * the tracepoint muck will deadlock against ctx->mutex, but the tracepoint -	 * stuff does not actually need it. So temporarily drop ctx->mutex. As per -	 * perf_event_ctx_lock() we already have a reference on ctx. -	 * -	 * This can result in event getting moved to a different ctx, but that -	 * does not affect the tracepoint state. -	 */ -	mutex_unlock(&ctx->mutex); -	ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); -	mutex_lock(&ctx->mutex); - -	return ret; -} -  static int perf_event_set_filter(struct perf_event *event, void __user *arg)  { -	char *filter_str;  	int ret = -EINVAL; - -	if ((event->attr.type != PERF_TYPE_TRACEPOINT || -	    !IS_ENABLED(CONFIG_EVENT_TRACING)) && -	    !has_addr_filter(event)) -		return -EINVAL; +	char *filter_str;  	filter_str = strndup_user(arg, PAGE_SIZE);  	if (IS_ERR(filter_str))  		return PTR_ERR(filter_str); -	if (IS_ENABLED(CONFIG_EVENT_TRACING) && -	    event->attr.type == PERF_TYPE_TRACEPOINT) -		ret = perf_tracepoint_set_filter(event, filter_str); -	else if (has_addr_filter(event)) +#ifdef CONFIG_EVENT_TRACING +	if (perf_event_is_tracing(event)) { +		struct perf_event_context *ctx = event->ctx; + +		/* +		 * Beware, here be dragons!! +		 * +		 * the tracepoint muck will deadlock against ctx->mutex, but +		 * the tracepoint stuff does not actually need it. So +		 * temporarily drop ctx->mutex. As per perf_event_ctx_lock() we +		 * already have a reference on ctx. +		 * +		 * This can result in event getting moved to a different ctx, +		 * but that does not affect the tracepoint state. +		 */ +		mutex_unlock(&ctx->mutex); +		ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); +		mutex_lock(&ctx->mutex); +	} else +#endif +	if (has_addr_filter(event))  		ret = perf_event_set_addr_filter(event, filter_str);  	kfree(filter_str); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 48150ab42de9..4a4fd567fb26 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1894,6 +1894,12 @@ int timers_dead_cpu(unsigned int cpu)  		raw_spin_lock_irq(&new_base->lock);  		raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); +		/* +		 * The current CPUs base clock might be stale. Update it +		 * before moving the timers over. +		 */ +		forward_timer_base(new_base); +  		BUG_ON(old_base->running_timer);  		for (i = 0; i < WHEEL_SIZE; i++) diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 55d6dff37daf..2c416509b834 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -8,6 +8,7 @@  #include <linux/module.h>  #include <linux/kprobes.h>  #include "trace.h" +#include "trace_probe.h"  static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; @@ -237,6 +238,107 @@ void perf_trace_destroy(struct perf_event *p_event)  	mutex_unlock(&event_mutex);  } +#ifdef CONFIG_KPROBE_EVENTS +int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe) +{ +	int ret; +	char *func = NULL; +	struct trace_event_call *tp_event; + +	if (p_event->attr.kprobe_func) { +		func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL); +		if (!func) +			return -ENOMEM; +		ret = strncpy_from_user( +			func, u64_to_user_ptr(p_event->attr.kprobe_func), +			KSYM_NAME_LEN); +		if (ret < 0) +			goto out; + +		if (func[0] == '\0') { +			kfree(func); +			func = NULL; +		} +	} + +	tp_event = create_local_trace_kprobe( +		func, (void *)(unsigned long)(p_event->attr.kprobe_addr), +		p_event->attr.probe_offset, is_retprobe); +	if (IS_ERR(tp_event)) { +		ret = PTR_ERR(tp_event); +		goto out; +	} + +	ret = perf_trace_event_init(tp_event, p_event); +	if (ret) +		destroy_local_trace_kprobe(tp_event); +out: +	kfree(func); +	return ret; +} + +void perf_kprobe_destroy(struct perf_event *p_event) +{ +	perf_trace_event_close(p_event); +	perf_trace_event_unreg(p_event); + +	destroy_local_trace_kprobe(p_event->tp_event); +} +#endif /* CONFIG_KPROBE_EVENTS */ + +#ifdef CONFIG_UPROBE_EVENTS +int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe) +{ +	int ret; +	char *path = NULL; +	struct trace_event_call *tp_event; + +	if (!p_event->attr.uprobe_path) +		return -EINVAL; +	path = kzalloc(PATH_MAX, GFP_KERNEL); +	if (!path) +		return -ENOMEM; +	ret = strncpy_from_user( +		path, u64_to_user_ptr(p_event->attr.uprobe_path), PATH_MAX); +	if (ret < 0) +		goto out; +	if (path[0] == '\0') { +		ret = -EINVAL; +		goto out; +	} + +	tp_event = create_local_trace_uprobe( +		path, p_event->attr.probe_offset, is_retprobe); +	if (IS_ERR(tp_event)) { +		ret = PTR_ERR(tp_event); +		goto out; +	} + +	/* +	 * local trace_uprobe need to hold event_mutex to call +	 * uprobe_buffer_enable() and uprobe_buffer_disable(). +	 * event_mutex is not required for local trace_kprobes. +	 */ +	mutex_lock(&event_mutex); +	ret = perf_trace_event_init(tp_event, p_event); +	if (ret) +		destroy_local_trace_uprobe(tp_event); +	mutex_unlock(&event_mutex); +out: +	kfree(path); +	return ret; +} + +void perf_uprobe_destroy(struct perf_event *p_event) +{ +	mutex_lock(&event_mutex); +	perf_trace_event_close(p_event); +	perf_trace_event_unreg(p_event); +	mutex_unlock(&event_mutex); +	destroy_local_trace_uprobe(p_event->tp_event); +} +#endif /* CONFIG_UPROBE_EVENTS */ +  int perf_trace_add(struct perf_event *p_event, int flags)  {  	struct trace_event_call *tp_event = p_event->tp_event; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 1fad24acd444..5ce9b8cf7be3 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -462,6 +462,14 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)  			disable_kprobe(&tk->rp.kp);  		wait = 1;  	} + +	/* +	 * if tk is not added to any list, it must be a local trace_kprobe +	 * created with perf_event_open. We don't need to wait for these +	 * trace_kprobes +	 */ +	if (list_empty(&tk->list)) +		wait = 0;   out:  	if (wait) {  		/* @@ -1358,12 +1366,9 @@ static struct trace_event_functions kprobe_funcs = {  	.trace		= print_kprobe_event  }; -static int register_kprobe_event(struct trace_kprobe *tk) +static inline void init_trace_event_call(struct trace_kprobe *tk, +					 struct trace_event_call *call)  { -	struct trace_event_call *call = &tk->tp.call; -	int ret; - -	/* Initialize trace_event_call */  	INIT_LIST_HEAD(&call->class->fields);  	if (trace_kprobe_is_return(tk)) {  		call->event.funcs = &kretprobe_funcs; @@ -1372,6 +1377,19 @@ static int register_kprobe_event(struct trace_kprobe *tk)  		call->event.funcs = &kprobe_funcs;  		call->class->define_fields = kprobe_event_define_fields;  	} + +	call->flags = TRACE_EVENT_FL_KPROBE; +	call->class->reg = kprobe_register; +	call->data = tk; +} + +static int register_kprobe_event(struct trace_kprobe *tk) +{ +	struct trace_event_call *call = &tk->tp.call; +	int ret = 0; + +	init_trace_event_call(tk, call); +  	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)  		return -ENOMEM;  	ret = register_trace_event(&call->event); @@ -1379,9 +1397,6 @@ static int register_kprobe_event(struct trace_kprobe *tk)  		kfree(call->print_fmt);  		return -ENODEV;  	} -	call->flags = TRACE_EVENT_FL_KPROBE; -	call->class->reg = kprobe_register; -	call->data = tk;  	ret = trace_add_event_call(call);  	if (ret) {  		pr_info("Failed to register kprobe event: %s\n", @@ -1403,6 +1418,66 @@ static int unregister_kprobe_event(struct trace_kprobe *tk)  	return ret;  } +#ifdef CONFIG_PERF_EVENTS +/* create a trace_kprobe, but don't add it to global lists */ +struct trace_event_call * +create_local_trace_kprobe(char *func, void *addr, unsigned long offs, +			  bool is_return) +{ +	struct trace_kprobe *tk; +	int ret; +	char *event; + +	/* +	 * local trace_kprobes are not added to probe_list, so they are never +	 * searched in find_trace_kprobe(). Therefore, there is no concern of +	 * duplicated name here. +	 */ +	event = func ? func : "DUMMY_EVENT"; + +	tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func, +				offs, 0 /* maxactive */, 0 /* nargs */, +				is_return); + +	if (IS_ERR(tk)) { +		pr_info("Failed to allocate trace_probe.(%d)\n", +			(int)PTR_ERR(tk)); +		return ERR_CAST(tk); +	} + +	init_trace_event_call(tk, &tk->tp.call); + +	if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) { +		ret = -ENOMEM; +		goto error; +	} + +	ret = __register_trace_kprobe(tk); +	if (ret < 0) +		goto error; + +	return &tk->tp.call; +error: +	free_trace_kprobe(tk); +	return ERR_PTR(ret); +} + +void destroy_local_trace_kprobe(struct trace_event_call *event_call) +{ +	struct trace_kprobe *tk; + +	tk = container_of(event_call, struct trace_kprobe, tp.call); + +	if (trace_probe_is_enabled(&tk->tp)) { +		WARN_ON(1); +		return; +	} + +	__unregister_trace_kprobe(tk); +	free_trace_kprobe(tk); +} +#endif /* CONFIG_PERF_EVENTS */ +  /* Make a tracefs interface for controlling probe points */  static __init int init_kprobe_trace(void)  { diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index e101c5bb9eda..0745f895f780 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -416,3 +416,14 @@ store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,  }  extern int set_print_fmt(struct trace_probe *tp, bool is_return); + +#ifdef CONFIG_PERF_EVENTS +extern struct trace_event_call * +create_local_trace_kprobe(char *func, void *addr, unsigned long offs, +			  bool is_return); +extern void destroy_local_trace_kprobe(struct trace_event_call *event_call); + +extern struct trace_event_call * +create_local_trace_uprobe(char *name, unsigned long offs, bool is_return); +extern void destroy_local_trace_uprobe(struct trace_event_call *event_call); +#endif diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 268029ae1be6..2014f4351ae0 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1292,16 +1292,25 @@ static struct trace_event_functions uprobe_funcs = {  	.trace		= print_uprobe_event  }; -static int register_uprobe_event(struct trace_uprobe *tu) +static inline void init_trace_event_call(struct trace_uprobe *tu, +					 struct trace_event_call *call)  { -	struct trace_event_call *call = &tu->tp.call; -	int ret; - -	/* Initialize trace_event_call */  	INIT_LIST_HEAD(&call->class->fields);  	call->event.funcs = &uprobe_funcs;  	call->class->define_fields = uprobe_event_define_fields; +	call->flags = TRACE_EVENT_FL_UPROBE; +	call->class->reg = trace_uprobe_register; +	call->data = tu; +} + +static int register_uprobe_event(struct trace_uprobe *tu) +{ +	struct trace_event_call *call = &tu->tp.call; +	int ret = 0; + +	init_trace_event_call(tu, call); +  	if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0)  		return -ENOMEM; @@ -1311,9 +1320,6 @@ static int register_uprobe_event(struct trace_uprobe *tu)  		return -ENODEV;  	} -	call->flags = TRACE_EVENT_FL_UPROBE; -	call->class->reg = trace_uprobe_register; -	call->data = tu;  	ret = trace_add_event_call(call);  	if (ret) { @@ -1339,6 +1345,70 @@ static int unregister_uprobe_event(struct trace_uprobe *tu)  	return 0;  } +#ifdef CONFIG_PERF_EVENTS +struct trace_event_call * +create_local_trace_uprobe(char *name, unsigned long offs, bool is_return) +{ +	struct trace_uprobe *tu; +	struct inode *inode; +	struct path path; +	int ret; + +	ret = kern_path(name, LOOKUP_FOLLOW, &path); +	if (ret) +		return ERR_PTR(ret); + +	inode = igrab(d_inode(path.dentry)); +	path_put(&path); + +	if (!inode || !S_ISREG(inode->i_mode)) { +		iput(inode); +		return ERR_PTR(-EINVAL); +	} + +	/* +	 * local trace_kprobes are not added to probe_list, so they are never +	 * searched in find_trace_kprobe(). Therefore, there is no concern of +	 * duplicated name "DUMMY_EVENT" here. +	 */ +	tu = alloc_trace_uprobe(UPROBE_EVENT_SYSTEM, "DUMMY_EVENT", 0, +				is_return); + +	if (IS_ERR(tu)) { +		pr_info("Failed to allocate trace_uprobe.(%d)\n", +			(int)PTR_ERR(tu)); +		return ERR_CAST(tu); +	} + +	tu->offset = offs; +	tu->inode = inode; +	tu->filename = kstrdup(name, GFP_KERNEL); +	init_trace_event_call(tu, &tu->tp.call); + +	if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) { +		ret = -ENOMEM; +		goto error; +	} + +	return &tu->tp.call; +error: +	free_trace_uprobe(tu); +	return ERR_PTR(ret); +} + +void destroy_local_trace_uprobe(struct trace_event_call *event_call) +{ +	struct trace_uprobe *tu; + +	tu = container_of(event_call, struct trace_uprobe, tp.call); + +	kfree(tu->tp.call.print_fmt); +	tu->tp.call.print_fmt = NULL; + +	free_trace_uprobe(tu); +} +#endif /* CONFIG_PERF_EVENTS */ +  /* Make a trace interface for controling probe points */  static __init int init_uprobe_trace(void)  { |