diff options
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r-- | kernel/events/core.c | 200 |
1 files changed, 172 insertions, 28 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index d3dae3419b99..ae16867670a9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -163,6 +163,7 @@ static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; +static atomic_t nr_switch_events __read_mostly; static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -1868,8 +1869,6 @@ event_sched_in(struct perf_event *event, perf_pmu_disable(event->pmu); - event->tstamp_running += tstamp - event->tstamp_stopped; - perf_set_shadow_time(event, ctx, tstamp); perf_log_itrace_start(event); @@ -1881,6 +1880,8 @@ event_sched_in(struct perf_event *event, goto out; } + event->tstamp_running += tstamp - event->tstamp_stopped; + if (!is_software_event(event)) cpuctx->active_oncpu++; if (!ctx->nr_active++) @@ -2619,6 +2620,9 @@ static void perf_pmu_sched_task(struct task_struct *prev, local_irq_restore(flags); } +static void perf_event_switch(struct task_struct *task, + struct task_struct *next_prev, bool sched_in); + #define for_each_task_context_nr(ctxn) \ for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) @@ -2641,6 +2645,9 @@ void __perf_event_task_sched_out(struct task_struct *task, if (__this_cpu_read(perf_sched_cb_usages)) perf_pmu_sched_task(task, next, false); + if (atomic_read(&nr_switch_events)) + perf_event_switch(task, next, false); + for_each_task_context_nr(ctxn) perf_event_context_sched_out(task, ctxn, next); @@ -2831,6 +2838,9 @@ void __perf_event_task_sched_in(struct task_struct *prev, if (atomic_read(this_cpu_ptr(&perf_cgroup_events))) perf_cgroup_sched_in(prev, task); + if (atomic_read(&nr_switch_events)) + perf_event_switch(task, prev, true); + if (__this_cpu_read(perf_sched_cb_usages)) perf_pmu_sched_task(prev, task, true); } @@ -3454,6 +3464,10 @@ static void unaccount_event(struct perf_event *event) atomic_dec(&nr_task_events); if (event->attr.freq) atomic_dec(&nr_freq_events); + if (event->attr.context_switch) { + static_key_slow_dec_deferred(&perf_sched_events); + atomic_dec(&nr_switch_events); + } if (is_cgroup_event(event)) static_key_slow_dec_deferred(&perf_sched_events); if (has_branch_stack(event)) @@ -3958,28 +3972,21 @@ static void perf_event_for_each(struct perf_event *event, perf_event_for_each_child(sibling, func); } -static int perf_event_period(struct perf_event *event, u64 __user *arg) -{ - struct perf_event_context *ctx = event->ctx; - int ret = 0, active; +struct period_event { + struct perf_event *event; u64 value; +}; - if (!is_sampling_event(event)) - return -EINVAL; - - if (copy_from_user(&value, arg, sizeof(value))) - return -EFAULT; - - if (!value) - return -EINVAL; +static int __perf_event_period(void *info) +{ + struct period_event *pe = info; + struct perf_event *event = pe->event; + struct perf_event_context *ctx = event->ctx; + u64 value = pe->value; + bool active; - raw_spin_lock_irq(&ctx->lock); + raw_spin_lock(&ctx->lock); if (event->attr.freq) { - if (value > sysctl_perf_event_sample_rate) { - ret = -EINVAL; - goto unlock; - } - event->attr.sample_freq = value; } else { event->attr.sample_period = value; @@ -3998,11 +4005,53 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) event->pmu->start(event, PERF_EF_RELOAD); perf_pmu_enable(ctx->pmu); } + raw_spin_unlock(&ctx->lock); + + return 0; +} + +static int perf_event_period(struct perf_event *event, u64 __user *arg) +{ + struct period_event pe = { .event = event, }; + struct perf_event_context *ctx = event->ctx; + struct task_struct *task; + u64 value; + + if (!is_sampling_event(event)) + return -EINVAL; + + if (copy_from_user(&value, arg, sizeof(value))) + return -EFAULT; + + if (!value) + return -EINVAL; + + if (event->attr.freq && value > sysctl_perf_event_sample_rate) + return -EINVAL; + + task = ctx->task; + pe.value = value; + + if (!task) { + cpu_function_call(event->cpu, __perf_event_period, &pe); + return 0; + } -unlock: +retry: + if (!task_function_call(task, __perf_event_period, &pe)) + return 0; + + raw_spin_lock_irq(&ctx->lock); + if (ctx->is_active) { + raw_spin_unlock_irq(&ctx->lock); + task = ctx->task; + goto retry; + } + + __perf_event_period(&pe); raw_spin_unlock_irq(&ctx->lock); - return ret; + return 0; } static const struct file_operations perf_fops; @@ -4740,12 +4789,20 @@ static const struct file_operations perf_fops = { * to user-space before waking everybody up. */ +static inline struct fasync_struct **perf_event_fasync(struct perf_event *event) +{ + /* only the parent has fasync state */ + if (event->parent) + event = event->parent; + return &event->fasync; +} + void perf_event_wakeup(struct perf_event *event) { ring_buffer_wakeup(event); if (event->pending_kill) { - kill_fasync(&event->fasync, SIGIO, event->pending_kill); + kill_fasync(perf_event_fasync(event), SIGIO, event->pending_kill); event->pending_kill = 0; } } @@ -5982,6 +6039,91 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost) } /* + * context_switch tracking + */ + +struct perf_switch_event { + struct task_struct *task; + struct task_struct *next_prev; + + struct { + struct perf_event_header header; + u32 next_prev_pid; + u32 next_prev_tid; + } event_id; +}; + +static int perf_event_switch_match(struct perf_event *event) +{ + return event->attr.context_switch; +} + +static void perf_event_switch_output(struct perf_event *event, void *data) +{ + struct perf_switch_event *se = data; + struct perf_output_handle handle; + struct perf_sample_data sample; + int ret; + + if (!perf_event_switch_match(event)) + return; + + /* Only CPU-wide events are allowed to see next/prev pid/tid */ + if (event->ctx->task) { + se->event_id.header.type = PERF_RECORD_SWITCH; + se->event_id.header.size = sizeof(se->event_id.header); + } else { + se->event_id.header.type = PERF_RECORD_SWITCH_CPU_WIDE; + se->event_id.header.size = sizeof(se->event_id); + se->event_id.next_prev_pid = + perf_event_pid(event, se->next_prev); + se->event_id.next_prev_tid = + perf_event_tid(event, se->next_prev); + } + + perf_event_header__init_id(&se->event_id.header, &sample, event); + + ret = perf_output_begin(&handle, event, se->event_id.header.size); + if (ret) + return; + + if (event->ctx->task) + perf_output_put(&handle, se->event_id.header); + else + perf_output_put(&handle, se->event_id); + + perf_event__output_id_sample(event, &handle, &sample); + + perf_output_end(&handle); +} + +static void perf_event_switch(struct task_struct *task, + struct task_struct *next_prev, bool sched_in) +{ + struct perf_switch_event switch_event; + + /* N.B. caller checks nr_switch_events != 0 */ + + switch_event = (struct perf_switch_event){ + .task = task, + .next_prev = next_prev, + .event_id = { + .header = { + /* .type */ + .misc = sched_in ? 0 : PERF_RECORD_MISC_SWITCH_OUT, + /* .size */ + }, + /* .next_prev_pid */ + /* .next_prev_tid */ + }, + }; + + perf_event_aux(perf_event_switch_output, + &switch_event, + NULL); +} + +/* * IRQ throttle logging */ @@ -6040,8 +6182,6 @@ static void perf_log_itrace_start(struct perf_event *event) event->hw.itrace_started) return; - event->hw.itrace_started = 1; - rec.header.type = PERF_RECORD_ITRACE_START; rec.header.misc = 0; rec.header.size = sizeof(rec); @@ -6124,7 +6264,7 @@ static int __perf_event_overflow(struct perf_event *event, else perf_event_output(event, data, regs); - if (event->fasync && event->pending_kill) { + if (*perf_event_fasync(event) && event->pending_kill) { event->pending_wakeup = 1; irq_work_queue(&event->pending); } @@ -6749,8 +6889,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) if (event->tp_event->prog) return -EEXIST; - if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) - /* bpf programs can only be attached to kprobes */ + if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE)) + /* bpf programs can only be attached to u/kprobes */ return -EINVAL; prog = bpf_prog_get(prog_fd); @@ -7479,6 +7619,10 @@ static void account_event(struct perf_event *event) if (atomic_inc_return(&nr_freq_events) == 1) tick_nohz_full_kick_all(); } + if (event->attr.context_switch) { + atomic_inc(&nr_switch_events); + static_key_slow_inc(&perf_sched_events.key); + } if (has_branch_stack(event)) static_key_slow_inc(&perf_sched_events.key); if (is_cgroup_event(event)) |