aboutsummaryrefslogtreecommitdiff
path: root/include/linux/perf_event.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-18 15:03:58 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-18 15:03:58 +0200
commit9f0c253ddddca608457a42e509267bed2dee0a50 (patch)
tree6e00e1ed61e997c06169c70c9365f213fe412fc8 /include/linux/perf_event.h
parent941c122da5c8355335dc16011c1c291a32cd1118 (diff)
parent5e645f31139183ac9a282238da18ca6bbc1c6f4a (diff)
Merge tag 'perf-core-2024-09-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events updates from Ingo Molnar: - Implement per-PMU context rescheduling to significantly improve single-PMU performance, and related cleanups/fixes (Peter Zijlstra and Namhyung Kim) - Fix ancient bug resulting in a lot of events being dropped erroneously at higher sampling frequencies (Luo Gengkun) - uprobes enhancements: - Implement RCU-protected hot path optimizations for better performance: "For baseline vs SRCU, peak througput increased from 3.7 M/s (million uprobe triggerings per second) up to about 8 M/s. For uretprobes it's a bit more modest with bump from 2.4 M/s to 5 M/s. For SRCU vs RCU Tasks Trace, peak throughput for uprobes increases further from 8 M/s to 10.3 M/s (+28%!), and for uretprobes from 5.3 M/s to 5.8 M/s (+11%), as we have more work to do on uretprobes side. Even single-thread (no contention) performance is slightly better: 3.276 M/s to 3.396 M/s (+3.5%) for uprobes, and 2.055 M/s to 2.174 M/s (+5.8%) for uretprobes." (Andrii Nakryiko et al) - Document mmap_lock, don't abuse get_user_pages_remote() (Oleg Nesterov) - Cleanups & fixes to prepare for future work: - Remove uprobe_register_refctr() - Simplify error handling for alloc_uprobe() - Make uprobe_register() return struct uprobe * - Fold __uprobe_unregister() into uprobe_unregister() - Shift put_uprobe() from delete_uprobe() to uprobe_unregister() - BPF: Fix use-after-free in bpf_uprobe_multi_link_attach() (Oleg Nesterov) - New feature & ABI extension: allow events to use PERF_SAMPLE READ with inheritance, enabling sample based profiling of a group of counters over a hierarchy of processes or threads (Ben Gainey) - Intel uncore & power events updates: - Add Arrow Lake and Lunar Lake support - Add PERF_EV_CAP_READ_SCOPE - Clean up and enhance cpumask and hotplug support (Kan Liang) - Add LNL uncore iMC freerunning support - Use D0:F0 as a default device (Zhenyu Wang) - Intel PT: fix AUX snapshot handling race (Adrian Hunter) - Misc fixes and cleanups (James Clark, Jiri Olsa, Oleg Nesterov and Peter Zijlstra) * tag 'perf-core-2024-09-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (40 commits) dmaengine: idxd: Clean up cpumask and hotplug for perfmon iommu/vt-d: Clean up cpumask and hotplug for perfmon perf/x86/intel/cstate: Clean up cpumask and hotplug perf: Add PERF_EV_CAP_READ_SCOPE perf: Generic hotplug support for a PMU with a scope uprobes: perform lockless SRCU-protected uprobes_tree lookup rbtree: provide rb_find_rcu() / rb_find_add_rcu() perf/uprobe: split uprobe_unregister() uprobes: travers uprobe's consumer list locklessly under SRCU protection uprobes: get rid of enum uprobe_filter_ctx in uprobe filter callbacks uprobes: protected uprobe lifetime with SRCU uprobes: revamp uprobe refcounting and lifetime management bpf: Fix use-after-free in bpf_uprobe_multi_link_attach() perf/core: Fix small negative period being ignored perf: Really fix event_function_call() locking perf: Optimize __pmu_ctx_sched_out() perf: Add context time freeze perf: Fix event_function_call() locking perf: Extract a few helpers perf: Optimize context reschedule for single PMU cases ...
Diffstat (limited to 'include/linux/perf_event.h')
-rw-r--r--include/linux/perf_event.h32
1 files changed, 30 insertions, 2 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index e336306b8c08..fb908843f209 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -168,6 +168,9 @@ struct hw_perf_event {
struct hw_perf_event_extra extra_reg;
struct hw_perf_event_extra branch_reg;
};
+ struct { /* aux / Intel-PT */
+ u64 aux_config;
+ };
struct { /* software */
struct hrtimer hrtimer;
};
@@ -292,6 +295,19 @@ struct perf_event_pmu_context;
#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
+/**
+ * pmu::scope
+ */
+enum perf_pmu_scope {
+ PERF_PMU_SCOPE_NONE = 0,
+ PERF_PMU_SCOPE_CORE,
+ PERF_PMU_SCOPE_DIE,
+ PERF_PMU_SCOPE_CLUSTER,
+ PERF_PMU_SCOPE_PKG,
+ PERF_PMU_SCOPE_SYS_WIDE,
+ PERF_PMU_MAX_SCOPE,
+};
+
struct perf_output_handle;
#define PMU_NULL_DEV ((void *)(~0UL))
@@ -315,6 +331,11 @@ struct pmu {
*/
int capabilities;
+ /*
+ * PMU scope
+ */
+ unsigned int scope;
+
int __percpu *pmu_disable_count;
struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
@@ -615,10 +636,13 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
* PERF_EV_CAP_SIBLING: An event with this flag must be a group sibling and
* cannot be a group leader. If an event with this flag is detached from the
* group it is scheduled out and moved into an unrecoverable ERROR state.
+ * PERF_EV_CAP_READ_SCOPE: A CPU event that can be read from any CPU of the
+ * PMU scope where it is active.
*/
#define PERF_EV_CAP_SOFTWARE BIT(0)
#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1)
#define PERF_EV_CAP_SIBLING BIT(2)
+#define PERF_EV_CAP_READ_SCOPE BIT(3)
#define SWEVENT_HLIST_BITS 8
#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
@@ -963,12 +987,16 @@ struct perf_event_context {
struct rcu_head rcu_head;
/*
- * Sum (event->pending_work + event->pending_work)
+ * The count of events for which using the switch-out fast path
+ * should be avoided.
+ *
+ * Sum (event->pending_work + events with
+ * (attr->inherit && (attr->sample_type & PERF_SAMPLE_READ)))
*
* The SIGTRAP is targeted at ctx->task, as such it won't do changing
* that until the signal is delivered.
*/
- local_t nr_pending;
+ local_t nr_no_switch_fast;
};
struct perf_cpu_pmu_context {