diff options
Diffstat (limited to 'tools/perf/Documentation')
-rw-r--r-- | tools/perf/Documentation/itrace.txt | 1 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-annotate.txt | 7 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-config.txt | 6 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-dlfilter.txt | 251 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-inject.txt | 10 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-intel-pt.txt | 125 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-probe.txt | 19 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-script-python.txt | 46 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-script.txt | 22 | ||||
-rw-r--r-- | tools/perf/Documentation/perf-top.txt | 12 | ||||
-rw-r--r-- | tools/perf/Documentation/perf.data-file-format.txt | 33 |
11 files changed, 514 insertions, 18 deletions
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 0f1005209a2b..2d586fe5e4c5 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -20,6 +20,7 @@ L synthesize last branch entries on existing event records s skip initial number of events q quicker (less detailed) decoding + Z prefer to ignore timestamps (so-called "timeless" decoding) The default is all events i.e. the same as --itrace=ibxwpe, except for perf script where it is --itrace=ce diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 80c1be5d566c..33c2521cba4a 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -58,6 +58,13 @@ OPTIONS --ignore-vmlinux:: Ignore vmlinux files. +--itrace:: + Options for decoding instruction tracing data. The options are: + +include::itrace.txt[] + + To disable decoding entirely, use --no-itrace. + -m:: --modules:: Load module symbols. WARNING: use only with -k and LIVE kernel. diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index b0872c801866..3bb75c1f25e8 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -706,6 +706,12 @@ intel-pt.*:: If set, Intel PT decoder will set the mispred flag on all branches. + intel-pt.max-loops:: + If set and non-zero, the maximum number of unconditional + branches decoded without consuming any trace packets. If + the maximum is exceeded there will be a "Never-ending loop" + error. The default is 100000. + auxtrace.*:: auxtrace.dumpdir:: diff --git a/tools/perf/Documentation/perf-dlfilter.txt b/tools/perf/Documentation/perf-dlfilter.txt new file mode 100644 index 000000000000..02842cb4cf90 --- /dev/null +++ b/tools/perf/Documentation/perf-dlfilter.txt @@ -0,0 +1,251 @@ +perf-dlfilter(1) +================ + +NAME +---- +perf-dlfilter - Filter sample events using a dynamically loaded shared +object file + +SYNOPSIS +-------- +[verse] +'perf script' [--dlfilter file.so ] [ --dlarg arg ]... + +DESCRIPTION +----------- + +This option is used to process data through a custom filter provided by a +dynamically loaded shared object file. Arguments can be passed using --dlarg +and retrieved using perf_dlfilter_fns.args(). + +If 'file.so' does not contain "/", then it will be found either in the current +directory, or perf tools exec path which is ~/libexec/perf-core/dlfilters for +a local build and install (refer perf --exec-path), or the dynamic linker +paths. + +API +--- + +The API for filtering consists of the following: + +[source,c] +---- +#include <perf/perf_dlfilter.h> + +const struct perf_dlfilter_fns perf_dlfilter_fns; + +int start(void **data, void *ctx); +int stop(void *data, void *ctx); +int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx); +int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx); +const char *filter_description(const char **long_description); +---- + +If implemented, 'start' will be called at the beginning, before any +calls to 'filter_event' or 'filter_event_early'. Return 0 to indicate success, +or return a negative error code. '*data' can be assigned for use by other +functions. 'ctx' is needed for calls to perf_dlfilter_fns, but most +perf_dlfilter_fns are not valid when called from 'start'. + +If implemented, 'stop' will be called at the end, after any calls to +'filter_event' or 'filter_event_early'. Return 0 to indicate success, or +return a negative error code. 'data' is set by 'start'. 'ctx' is needed +for calls to perf_dlfilter_fns, but most perf_dlfilter_fns are not valid +when called from 'stop'. + +If implemented, 'filter_event' will be called for each sample event. +Return 0 to keep the sample event, 1 to filter it out, or return a negative +error code. 'data' is set by 'start'. 'ctx' is needed for calls to +'perf_dlfilter_fns'. + +'filter_event_early' is the same as 'filter_event' except it is called before +internal filtering. + +If implemented, 'filter_description' should return a one-line description +of the filter, and optionally a longer description. + +The perf_dlfilter_sample structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +'filter_event' and 'filter_event_early' are passed a perf_dlfilter_sample +structure, which contains the following fields: +[source,c] +---- +/* + * perf sample event information (as per perf script and <linux/perf_event.h>) + */ +struct perf_dlfilter_sample { + __u32 size; /* Size of this structure (for compatibility checking) */ + __u16 ins_lat; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */ + __u16 p_stage_cyc; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */ + __u64 ip; + __s32 pid; + __s32 tid; + __u64 time; + __u64 addr; + __u64 id; + __u64 stream_id; + __u64 period; + __u64 weight; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */ + __u64 transaction; /* Refer PERF_SAMPLE_TRANSACTION in <linux/perf_event.h> */ + __u64 insn_cnt; /* For instructions-per-cycle (IPC) */ + __u64 cyc_cnt; /* For instructions-per-cycle (IPC) */ + __s32 cpu; + __u32 flags; /* Refer PERF_DLFILTER_FLAG_* above */ + __u64 data_src; /* Refer PERF_SAMPLE_DATA_SRC in <linux/perf_event.h> */ + __u64 phys_addr; /* Refer PERF_SAMPLE_PHYS_ADDR in <linux/perf_event.h> */ + __u64 data_page_size; /* Refer PERF_SAMPLE_DATA_PAGE_SIZE in <linux/perf_event.h> */ + __u64 code_page_size; /* Refer PERF_SAMPLE_CODE_PAGE_SIZE in <linux/perf_event.h> */ + __u64 cgroup; /* Refer PERF_SAMPLE_CGROUP in <linux/perf_event.h> */ + __u8 cpumode; /* Refer CPUMODE_MASK etc in <linux/perf_event.h> */ + __u8 addr_correlates_sym; /* True => resolve_addr() can be called */ + __u16 misc; /* Refer perf_event_header in <linux/perf_event.h> */ + __u32 raw_size; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */ + const void *raw_data; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */ + __u64 brstack_nr; /* Number of brstack entries */ + const struct perf_branch_entry *brstack; /* Refer <linux/perf_event.h> */ + __u64 raw_callchain_nr; /* Number of raw_callchain entries */ + const __u64 *raw_callchain; /* Refer <linux/perf_event.h> */ + const char *event; +}; +---- + +The perf_dlfilter_fns structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The 'perf_dlfilter_fns' structure is populated with function pointers when the +file is loaded. The functions can be called by 'filter_event' or +'filter_event_early'. + +[source,c] +---- +struct perf_dlfilter_fns { + const struct perf_dlfilter_al *(*resolve_ip)(void *ctx); + const struct perf_dlfilter_al *(*resolve_addr)(void *ctx); + char **(*args)(void *ctx, int *dlargc); + __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al); + const __u8 *(*insn)(void *ctx, __u32 *length); + const char *(*srcline)(void *ctx, __u32 *line_number); + struct perf_event_attr *(*attr)(void *ctx); + __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len); + void *(*reserved[120])(void *); +}; +---- + +'resolve_ip' returns information about ip. + +'resolve_addr' returns information about addr (if addr_correlates_sym). + +'args' returns arguments from --dlarg options. + +'resolve_address' provides information about 'address'. al->size must be set +before calling. Returns 0 on success, -1 otherwise. + +'insn' returns instruction bytes and length. + +'srcline' return source file name and line number. + +'attr' returns perf_event_attr, refer <linux/perf_event.h>. + +'object_code' reads object code and returns the number of bytes read. + +The perf_dlfilter_al structure +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The 'perf_dlfilter_al' structure contains information about an address. + +[source,c] +---- +/* + * Address location (as per perf script) + */ +struct perf_dlfilter_al { + __u32 size; /* Size of this structure (for compatibility checking) */ + __u32 symoff; + const char *sym; + __u64 addr; /* Mapped address (from dso) */ + __u64 sym_start; + __u64 sym_end; + const char *dso; + __u8 sym_binding; /* STB_LOCAL, STB_GLOBAL or STB_WEAK, refer <elf.h> */ + __u8 is_64_bit; /* Only valid if dso is not NULL */ + __u8 is_kernel_ip; /* True if in kernel space */ + __u32 buildid_size; + __u8 *buildid; + /* Below members are only populated by resolve_ip() */ + __u8 filtered; /* true if this sample event will be filtered out */ + const char *comm; +}; +---- + +perf_dlfilter_sample flags +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The 'flags' member of 'perf_dlfilter_sample' corresponds with the flags field +of perf script. The bits of the flags are as follows: + +[source,c] +---- +/* Definitions for perf_dlfilter_sample flags */ +enum { + PERF_DLFILTER_FLAG_BRANCH = 1ULL << 0, + PERF_DLFILTER_FLAG_CALL = 1ULL << 1, + PERF_DLFILTER_FLAG_RETURN = 1ULL << 2, + PERF_DLFILTER_FLAG_CONDITIONAL = 1ULL << 3, + PERF_DLFILTER_FLAG_SYSCALLRET = 1ULL << 4, + PERF_DLFILTER_FLAG_ASYNC = 1ULL << 5, + PERF_DLFILTER_FLAG_INTERRUPT = 1ULL << 6, + PERF_DLFILTER_FLAG_TX_ABORT = 1ULL << 7, + PERF_DLFILTER_FLAG_TRACE_BEGIN = 1ULL << 8, + PERF_DLFILTER_FLAG_TRACE_END = 1ULL << 9, + PERF_DLFILTER_FLAG_IN_TX = 1ULL << 10, + PERF_DLFILTER_FLAG_VMENTRY = 1ULL << 11, + PERF_DLFILTER_FLAG_VMEXIT = 1ULL << 12, +}; +---- + +EXAMPLE +------- + +Filter out everything except branches from "foo" to "bar": + +[source,c] +---- +#include <perf/perf_dlfilter.h> +#include <string.h> + +const struct perf_dlfilter_fns perf_dlfilter_fns; + +int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx) +{ + const struct perf_dlfilter_al *al; + const struct perf_dlfilter_al *addr_al; + + if (!sample->ip || !sample->addr_correlates_sym) + return 1; + + al = perf_dlfilter_fns.resolve_ip(ctx); + if (!al || !al->sym || strcmp(al->sym, "foo")) + return 1; + + addr_al = perf_dlfilter_fns.resolve_addr(ctx); + if (!addr_al || !addr_al->sym || strcmp(addr_al->sym, "bar")) + return 1; + + return 0; +} +---- + +To build the shared object, assuming perf has been installed for the local user +i.e. perf_dlfilter.h is in ~/include/perf : + + gcc -c -I ~/include -fpic dlfilter-example.c + gcc -shared -o dlfilter-example.so dlfilter-example.o + +To use the filter with perf script: + + perf script --dlfilter dlfilter-example.so + +SEE ALSO +-------- +linkperf:perf-script[1] diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index a8eccff21281..91108fe3ad5f 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -68,6 +68,16 @@ include::itrace.txt[] --force:: Don't complain, do it. +--vm-time-correlation[=OPTIONS]:: + Some architectures may capture AUX area data which contains timestamps + affected by virtualization. This option will update those timestamps + in place, to correlate with host timestamps. The in-place update means + that an output file is not specified, and instead the input file is + modified. The options are architecture specific, except that they may + start with "dry-run" which will cause the file to be processed but + without updating it. Currently this option is supported only by + Intel PT, refer linkperf:perf-intel-pt[1] + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1], diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 1dcec73c910c..184ba62420f0 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -108,9 +108,9 @@ displayed as follows: perf script --itrace=ibxwpe -F+flags -The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, -system, asynchronous, interrupt, transaction abort, trace begin, trace end, and -in transaction, respectively. +The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional, +system, asynchronous, interrupt, transaction abort, trace begin, trace end, +in transaction, VM-entry, and VM-exit respectively. perf script also supports higher level ways to dump instruction traces: @@ -174,7 +174,11 @@ Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, and to script exported-sql-viewer.py for an example of using the database. There is also script intel-pt-events.py which provides an example of how to -unpack the raw data for power events and PTWRITE. +unpack the raw data for power events and PTWRITE. The script also displays +branches, and supports 2 additional modes selected by option: + + --insn-trace - instruction trace + --src-trace - source trace As mentioned above, it is easy to capture too much data. One way to limit the data captured is to use 'snapshot' mode which is explained further below. @@ -869,6 +873,7 @@ The letters are: L synthesize last branch entries on existing event records s skip initial number of events q quicker (less detailed) decoding + Z prefer to ignore timestamps (so-called "timeless" decoding) "Instructions" events look like they were recorded by "perf record -e instructions". @@ -1062,6 +1067,10 @@ What *will* be decoded with the qq option: - instruction pointer associated with PSB packets +The Z option is equivalent to having recorded a trace without TSC +(i.e. config term tsc=0). It can be useful to avoid timestamp issues when +decoding a trace of a virtual machine. + dump option ~~~~~~~~~~~ @@ -1150,8 +1159,9 @@ include::build-xed.txt[] Tracing Virtual Machines ------------------------ -Currently, only kernel tracing is supported and only with "timeless" decoding -i.e. no TSC timestamps +Currently, only kernel tracing is supported and only with either "timeless" decoding +(i.e. no TSC timestamps) or VM Time Correlation. VM Time Correlation is an extra step +using 'perf inject' and requires unchanging VMX TSC Offset and no VMX TSC Scaling. Other limitations and caveats @@ -1162,7 +1172,7 @@ Other limitations and caveats Guest VCPU is unknown but may be able to be inferred from the host thread Callchains are not supported -Example +Example using "timeless" decoding Start VM @@ -1226,6 +1236,107 @@ perf script can be used to provide an instruction trace :1440 1440 ffffffffbb74603c clockevents_program_event+0x4c ([guest.kernel.kallsyms]) popq %rbx :1440 1440 ffffffffbb74603d clockevents_program_event+0x4d ([guest.kernel.kallsyms]) popq %r12 +Example using VM Time Correlation + +Start VM + + $ sudo virsh start kubuntu20.04 + Domain kubuntu20.04 started + +Mount the guest file system. Note sshfs needs -o direct_io to enable reading of proc files. root access is needed to read /proc/kcore. + + $ mkdir -p vm0 + $ sshfs -o direct_io root@vm0:/ vm0 + +Copy the guest /proc/kallsyms, /proc/modules and /proc/kcore + + $ perf buildid-cache -v --kcore vm0/proc/kcore + same kcore found in /home/user/.debug/[kernel.kcore]/cc9c55a98c5e4ec0aeda69302554aabed5cd6491/2021021312450777 + $ KALLSYMS=/home/user/.debug/\[kernel.kcore\]/cc9c55a98c5e4ec0aeda69302554aabed5cd6491/2021021312450777/kallsyms + +Find the VM process + + $ ps -eLl | grep 'KVM\|PID' + F S UID PID PPID LWP C PRI NI ADDR SZ WCHAN TTY TIME CMD + 3 S 64055 16998 1 17005 13 80 0 - 1818189 - ? 00:00:16 CPU 0/KVM + 3 S 64055 16998 1 17006 4 80 0 - 1818189 - ? 00:00:05 CPU 1/KVM + 3 S 64055 16998 1 17007 3 80 0 - 1818189 - ? 00:00:04 CPU 2/KVM + 3 S 64055 16998 1 17008 4 80 0 - 1818189 - ? 00:00:05 CPU 3/KVM + +Start an open-ended perf record, tracing the VM process, do something on the VM, and then ctrl-C to stop. +IPC can be determined, hence cyc=1 can be added. +Only kernel decoding is supported, so 'k' must be specified. +Intel PT traces both the host and the guest so --guest and --host need to be specified. + + $ sudo perf kvm --guest --host --guestkallsyms $KALLSYMS record --kcore -e intel_pt/cyc=1/k -p 16998 + ^C[ perf record: Woken up 1 times to write data ] + [ perf record: Captured and wrote 9.041 MB perf.data.kvm ] + +Now 'perf inject' can be used to determine the VMX TCS Offset. Note, Intel PT TSC packets are +only 7-bytes, so the TSC Offset might differ from the actual value in the 8th byte. That will +have no effect i.e. the resulting timestamps will be correct anyway. + + $ perf inject -i perf.data.kvm --vm-time-correlation=dry-run + ERROR: Unknown TSC Offset for VMCS 0x1bff6a + VMCS: 0x1bff6a TSC Offset 0xffffe42722c64c41 + ERROR: Unknown TSC Offset for VMCS 0x1cbc08 + VMCS: 0x1cbc08 TSC Offset 0xffffe42722c64c41 + ERROR: Unknown TSC Offset for VMCS 0x1c3ce8 + VMCS: 0x1c3ce8 TSC Offset 0xffffe42722c64c41 + ERROR: Unknown TSC Offset for VMCS 0x1cbce9 + VMCS: 0x1cbce9 TSC Offset 0xffffe42722c64c41 + +Each virtual CPU has a different Virtual Machine Control Structure (VMCS) +shown above with the calculated TSC Offset. For an unchanging TSC Offset +they should all be the same for the same virtual machine. + +Now that the TSC Offset is known, it can be provided to 'perf inject' + + $ perf inject -i perf.data.kvm --vm-time-correlation="dry-run 0xffffe42722c64c41" + +Note the options for 'perf inject' --vm-time-correlation are: + + [ dry-run ] [ <TSC Offset> [ : <VMCS> [ , <VMCS> ]... ] ]... + +So it is possible to specify different TSC Offsets for different VMCS. +The option "dry-run" will cause the file to be processed but without updating it. +Note it is also possible to get a intel_pt.log file by adding option --itrace=d + +There were no errors so, do it for real + + $ perf inject -i perf.data.kvm --vm-time-correlation=0xffffe42722c64c41 --force + +'perf script' can be used to see if there are any decoder errors + + $ perf script -i perf.data.kvm --guestkallsyms $KALLSYMS --itrace=e-o + +There were none. + +'perf script' can be used to provide an instruction trace showing timestamps + + $ perf script -i perf.data.kvm --guestkallsyms $KALLSYMS --insn-trace --xed -F+ipc | grep -C10 vmresume | head -21 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cdd __vmx_vcpu_run+0x3d ([kernel.kallsyms]) movq 0x48(%rax), %r9 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133ce1 __vmx_vcpu_run+0x41 ([kernel.kallsyms]) movq 0x50(%rax), %r10 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133ce5 __vmx_vcpu_run+0x45 ([kernel.kallsyms]) movq 0x58(%rax), %r11 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133ce9 __vmx_vcpu_run+0x49 ([kernel.kallsyms]) movq 0x60(%rax), %r12 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133ced __vmx_vcpu_run+0x4d ([kernel.kallsyms]) movq 0x68(%rax), %r13 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cf1 __vmx_vcpu_run+0x51 ([kernel.kallsyms]) movq 0x70(%rax), %r14 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cf5 __vmx_vcpu_run+0x55 ([kernel.kallsyms]) movq 0x78(%rax), %r15 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cf9 __vmx_vcpu_run+0x59 ([kernel.kallsyms]) movq (%rax), %rax + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133cfc __vmx_vcpu_run+0x5c ([kernel.kallsyms]) callq 0xffffffff82133c40 + CPU 1/KVM 17006 [001] 11500.262865593: ffffffff82133c40 vmx_vmenter+0x0 ([kernel.kallsyms]) jz 0xffffffff82133c46 + CPU 1/KVM 17006 [001] 11500.262866075: ffffffff82133c42 vmx_vmenter+0x2 ([kernel.kallsyms]) vmresume IPC: 0.05 (40/769) + :17006 17006 [001] 11500.262869216: ffffffff82200cb0 asm_sysvec_apic_timer_interrupt+0x0 ([guest.kernel.kallsyms]) clac + :17006 17006 [001] 11500.262869216: ffffffff82200cb3 asm_sysvec_apic_timer_interrupt+0x3 ([guest.kernel.kallsyms]) pushq $0xffffffffffffffff + :17006 17006 [001] 11500.262869216: ffffffff82200cb5 asm_sysvec_apic_timer_interrupt+0x5 ([guest.kernel.kallsyms]) callq 0xffffffff82201160 + :17006 17006 [001] 11500.262869216: ffffffff82201160 error_entry+0x0 ([guest.kernel.kallsyms]) cld + :17006 17006 [001] 11500.262869216: ffffffff82201161 error_entry+0x1 ([guest.kernel.kallsyms]) pushq %rsi + :17006 17006 [001] 11500.262869216: ffffffff82201162 error_entry+0x2 ([guest.kernel.kallsyms]) movq 0x8(%rsp), %rsi + :17006 17006 [001] 11500.262869216: ffffffff82201167 error_entry+0x7 ([guest.kernel.kallsyms]) movq %rdi, 0x8(%rsp) + :17006 17006 [001] 11500.262869216: ffffffff8220116c error_entry+0xc ([guest.kernel.kallsyms]) pushq %rdx + :17006 17006 [001] 11500.262869216: ffffffff8220116d error_entry+0xd ([guest.kernel.kallsyms]) pushq %rcx + :17006 17006 [001] 11500.262869216: ffffffff8220116e error_entry+0xe ([guest.kernel.kallsyms]) pushq %rax + SEE ALSO diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index ed3ecfa422e1..080981d38d7b 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -226,7 +226,7 @@ So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And LAZY MATCHING ------------- - The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]). +The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]). e.g. 'a=*' can matches 'a=b', 'a = b', 'a == b' and so on. @@ -235,8 +235,8 @@ This provides some sort of flexibility and robustness to probe point definitions FILTER PATTERN -------------- - The filter pattern is a glob matching pattern(s) to filter variables. - In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")". +The filter pattern is a glob matching pattern(s) to filter variables. +In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")". e.g. With --filter "foo* | bar*", perf probe -V shows variables which start with "foo" or "bar". @@ -295,6 +295,19 @@ Add a probe in a source file using special characters by backslash escape ./perf probe -x /opt/test/a.out 'foo\+bar.c:4' +PERMISSIONS AND SYSCTL +---------------------- +Since perf probe depends on ftrace (tracefs) and kallsyms (/proc/kallsyms), you have to care about the permission and some sysctl knobs. + + - Since tracefs and kallsyms requires root or privileged user to access it, the following perf probe commands also require it; --add, --del, --list (except for --cache option) + + - The system admin can remount the tracefs with 755 (`sudo mount -o remount,mode=755 /sys/kernel/tracing/`) to allow unprivileged user to run the perf probe --list command. + + - /proc/sys/kernel/kptr_restrict = 2 (restrict all users) also prevents perf probe to retrieve the important information from kallsyms. You also need to set to 1 (restrict non CAP_SYSLOG users) for the above commands. Since the user-space probe doesn't need to access kallsyms, this is only for probing the kernel function (kprobes). + + - Since the perf probe commands read the vmlinux (for kernel) and/or the debuginfo file (including user-space application), you need to ensure that you can read those files. + + SEE ALSO -------- linkperf:perf-trace[1], linkperf:perf-record[1], linkperf:perf-buildid-cache[1] diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index 0fb9eda3cbca..5e43cfa5ea1e 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -550,6 +550,27 @@ def trace_unhandled(event_name, context, event_fields_dict): pass ---- +*process_event*, if defined, is called for any non-tracepoint event + +---- +def process_event(param_dict): + pass +---- + +*context_switch*, if defined, is called for any context switch + +---- +def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x): + pass +---- + +*auxtrace_error*, if defined, is called for any AUX area tracing error + +---- +def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x): + pass +---- + The remaining sections provide descriptions of each of the available built-in perf script Python modules and their associated functions. @@ -592,12 +613,18 @@ common, but need to be made accessible to user scripts nonetheless. perf_trace_context defines a set of functions that can be used to access this data in the context of the current event. Each of these functions expects a context variable, which is the same as the -context variable passed into every event handler as the second -argument. +context variable passed into every tracepoint event handler as the second +argument. For non-tracepoint events, the context variable is also present +as perf_trace_context.perf_script_context . common_pc(context) - returns common_preempt count for the current event common_flags(context) - returns common_flags for the current event common_lock_depth(context) - returns common_lock_depth for the current event + perf_sample_insn(context) - returns the machine code instruction + perf_set_itrace_options(context, itrace_options) - set --itrace options if they have not been set already + perf_sample_srcline(context) - returns source_file_name, line_number + perf_sample_srccode(context) - returns source_file_name, line_number, source_line + Util.py Module ~~~~~~~~~~~~~~ @@ -616,9 +643,20 @@ SUPPORTED FIELDS Currently supported fields: ev_name, comm, pid, tid, cpu, ip, time, period, phys_addr, addr, -symbol, dso, time_enabled, time_running, values, callchain, +symbol, symoff, dso, time_enabled, time_running, values, callchain, brstack, brstacksym, datasrc, datasrc_decode, iregs, uregs, -weight, transaction, raw_buf, attr. +weight, transaction, raw_buf, attr, cpumode. + +Fields that may also be present: + + flags - sample flags + flags_disp - sample flags display + insn_cnt - instruction count for determining instructions-per-cycle (IPC) + cyc_cnt - cycle count for determining IPC + addr_correlates_sym - addr can correlate to a symbol + addr_dso - addr dso + addr_symbol - addr symbol + addr_symoff - addr symbol offset Some fields have sub items: diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 5b8b61075039..aa3a0b2c29a2 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -98,6 +98,18 @@ OPTIONS Generate perf-script.[ext] starter script for given language, using current perf.data. +--dlfilter=<file>:: + Filter sample events using the given shared object file. + Refer linkperf:perf-dlfilter[1] + +--dlarg=<arg>:: + Pass 'arg' as an argument to the dlfilter. --dlarg may be repeated + to add more arguments. + +--list-dlfilters=:: + Display a list of available dlfilters. Use with option -v (must come + before option --list-dlfilters) to show long descriptions. + -a:: Force system-wide collection. Scripts run without a <command> normally use -a by default, while scripts run with a <command> @@ -183,14 +195,15 @@ OPTIONS At this point usage is displayed, and perf-script exits. The flags field is synthesized and may have a value when Instruction - Trace decoding. The flags are "bcrosyiABEx" which stand for branch, + Trace decoding. The flags are "bcrosyiABExgh" which stand for branch, call, return, conditional, system, asynchronous, interrupt, - transaction abort, trace begin, trace end, and in transaction, + transaction abort, trace begin, trace end, in transaction, VM-Entry, and VM-Exit respectively. Known combinations of flags are printed more nicely e.g. "call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b", "int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs", "async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB", - "tr end" for "bE". However the "x" flag will be display separately in those + "tr end" for "bE", "vmentry" for "bcg", "vmexit" for "bch". + However the "x" flag will be displayed separately in those cases e.g. "jcc (x)" for a condition branch within a transaction. The callindent field is synthesized and may have a value when @@ -482,4 +495,5 @@ include::itrace.txt[] SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], -linkperf:perf-script-python[1], linkperf:perf-intel-pt[1] +linkperf:perf-script-python[1], linkperf:perf-intel-pt[1], +linkperf:perf-dlfilter[1] diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index bba5ffb05463..9898a32b8d9c 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -277,6 +277,18 @@ Default is to monitor all CPUS. Record events of type PERF_RECORD_NAMESPACES and display it with the 'cgroup_id' sort key. +-G name:: +--cgroup name:: +monitor only in the container (cgroup) called "name". This option is available only +in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to +container "name" are monitored when they run on the monitored CPUs. Multiple cgroups +can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup +to first event, second cgroup to second event and so on. It is possible to provide +an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have +corresponding events, i.e., they always refer to events defined earlier on the command +line. If the user wants to track multiple events for a specific cgroup, the user can +use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'. + --all-cgroups:: Record events of type PERF_RECORD_CGROUP and display it with the 'cgroup' sort key. diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 9ee96640744e..e6ff8c898ada 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -402,6 +402,39 @@ struct { u64 clockid_time_ns; }; + HEADER_HYBRID_TOPOLOGY = 30, + +Indicate the hybrid CPUs. The format of data is as below. + +struct { + u32 nr; + struct { + char pmu_name[]; + char cpus[]; + } [nr]; /* Variable length records */ +}; + +Example: + hybrid cpu system: + cpu_core cpu list : 0-15 + cpu_atom cpu list : 16-23 + + HEADER_HYBRID_CPU_PMU_CAPS = 31, + + A list of hybrid CPU PMU capabilities. + +struct { + u32 nr_pmu; + struct { + u32 nr_cpu_pmu_caps; + { + char name[]; + char value[]; + } [nr_cpu_pmu_caps]; + char pmu_name[]; + } [nr_pmu]; +}; + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, |