aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Starovoitov <[email protected]>2021-01-14 19:29:58 -0800
committerAlexei Starovoitov <[email protected]>2021-01-14 19:29:59 -0800
commiteed6a9a9571b94acdad98fab2fbf6a92199edf69 (patch)
tree75de6df2421dce6b1859d71c75562be1e70ee1a5
parent7064a7341a0d2fcfeff56be7e3917421fbb8b024 (diff)
parent88a16a1309333e43d328621ece3e9fa37027e8eb (diff)
Merge branch 'perf: Add mmap2 build id support'
Jiri Olsa says: ==================== hi, adding the support to have buildid stored in mmap2 event, so we can bypass the final perf record hunt on build ids. This patchset allows perf to record build ID in mmap2 event, and adds perf tooling to store/download binaries to .debug cache based on these build IDs. Note that the build id retrieval code is stolen from bpf code, where it's been used (together with file offsets) to replace IPs in user space stack traces. It's now added under lib directory. v7 changes: - included only missing kernel patches, cc-ed bpf@vger and rebased on bpf-next/master [Alexei] v6 changes: - last 4 patches rebased Arnaldo's perf/core v5 changes: - rebased on latest perf/core - several patches already pulled in - fixed trace+probe_vfs_getname.sh output redirection - fixed changelogs [Arnaldo] - renamed BUILD_ID_SIZE to BUILD_ID_SIZE_MAX [Song] v4 changes: - fixed typo in changelog [Namhyung] - removed force_download bool from struct dso_store_data, because it's not used [Namhyung] v3 changes: - added acks - removed forgotten debug code [Arnaldo] - fixed readlink termination [Ian] - fixed doc for --debuginfod=URLs [Ian] - adopted kernel's memchr_inv function and used it in build_id__is_defined function [Arnaldo] On recording server: - on the recording server we can run record with --buildid-mmap option to store build ids in mmap2 events: # perf record --buildid-mmap ^C[ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.836 MB perf.data ] - it stores nothing to ~/.debug cache: # find ~/.debug find: ‘/root/.debug’: No such file or directory - and still reports properly: # perf report --stdio ... 99.82% swapper [kernel.kallsyms] [k] native_safe_halt 0.03% swapper [kernel.kallsyms] [k] finish_task_switch 0.02% swapper [kernel.kallsyms] [k] __softirqentry_text_start 0.01% kcompactd0 [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore 0.01% ksoftirqd/6 [kernel.kallsyms] [k] slab_free_freelist_hook 0.01% kworker/17:1H-x [kernel.kallsyms] [k] slab_free_freelist_hook - display used/hit build ids: # perf buildid-list | head -5 5dcec522abf136fcfd3128f47e131f2365834dd7 /proc/kcore 589e403a34f55486bcac848a45e00bcdeedd1ca8 /usr/lib64/libcrypto.so.1.1.1g 94569566d4eac7e9c87ba029d43d4e2158f9527e /usr/lib64/libpthread-2.30.so 559b9702bebe31c6d132c8dc5cc887673d65d5b5 /usr/lib64/libc-2.30.so 40da7abe89f631f60538a17686a7d65c6a02ed31 /usr/lib64/ld-2.30.so - store build id binaries into build id cache: # perf buildid-cache -a perf.data OK 5dcec522abf136fcfd3128f47e131f2365834dd7 /proc/kcore OK 589e403a34f55486bcac848a45e00bcdeedd1ca8 /usr/lib64/libcrypto.so.1.1.1g OK 94569566d4eac7e9c87ba029d43d4e2158f9527e /usr/lib64/libpthread-2.30.so OK 559b9702bebe31c6d132c8dc5cc887673d65d5b5 /usr/lib64/libc-2.30.so OK 40da7abe89f631f60538a17686a7d65c6a02ed31 /usr/lib64/ld-2.30.so OK a674f7a47c78e35a088104647b9640710277b489 /usr/sbin/sshd OK e5cb4ca25f46485bdbc691c3a92e7e111dac3ef2 /usr/bin/bash OK 9bc8589108223c944b452f0819298a0c3cba6215 /usr/bin/find # find ~/.debug | head -5 /root/.debug /root/.debug/proc /root/.debug/proc/kcore /root/.debug/proc/kcore/5dcec522abf136fcfd3128f47e131f2365834dd7 /root/.debug/proc/kcore/5dcec522abf136fcfd3128f47e131f2365834dd7/kallsyms - run debuginfod daemon to provide binaries to another server (below) (the initialization could take some time) # debuginfod -F / On another server: - copy perf.data from 'record' server and run: $ find ~/.debug/ find: ‘/home/jolsa/.debug/’: No such file or directory $ perf buildid-list | head -5 No kallsyms or vmlinux with build-id 5dcec522abf136fcfd3128f47e131f2365834dd7 was found 5dcec522abf136fcfd3128f47e131f2365834dd7 [kernel.kallsyms] 5784f813b727a50cfd3363234aef9fcbab685cc4 /lib/modules/5.10.0-rc2speed+/kernel/fs/xfs/xfs.ko 589e403a34f55486bcac848a45e00bcdeedd1ca8 /usr/lib64/libcrypto.so.1.1.1g 94569566d4eac7e9c87ba029d43d4e2158f9527e /usr/lib64/libpthread-2.30.so 559b9702bebe31c6d132c8dc5cc887673d65d5b5 /usr/lib64/libc-2.30.so - report does not show anything (kernel build id does not match): $ perf report --stdio ... 76.73% swapper [kernel.kallsyms] [k] 0xffffffff81aa8ebe 1.89% find [kernel.kallsyms] [k] 0xffffffff810f2167 0.93% sshd [kernel.kallsyms] [k] 0xffffffff8153380c 0.83% swapper [kernel.kallsyms] [k] 0xffffffff81104b0b 0.71% kworker/u40:2-e [kernel.kallsyms] [k] 0xffffffff810f3850 0.70% kworker/u40:0-e [kernel.kallsyms] [k] 0xffffffff810f3850 0.64% find [kernel.kallsyms] [k] 0xffffffff81a9ba0a 0.63% find [kernel.kallsyms] [k] 0xffffffff81aa93b0 - add build ids does not work, because existing binaries (on another server) have different build ids: $ perf buildid-cache -a perf.data No kallsyms or vmlinux with build-id 5dcec522abf136fcfd3128f47e131f2365834dd7 was found FAIL 5dcec522abf136fcfd3128f47e131f2365834dd7 [kernel.kallsyms] FAIL 5784f813b727a50cfd3363234aef9fcbab685cc4 /lib/modules/5.10.0-rc2speed+/kernel/fs/xfs/xfs.ko FAIL 589e403a34f55486bcac848a45e00bcdeedd1ca8 /usr/lib64/libcrypto.so.1.1.1g FAIL 94569566d4eac7e9c87ba029d43d4e2158f9527e /usr/lib64/libpthread-2.30.so FAIL 559b9702bebe31c6d132c8dc5cc887673d65d5b5 /usr/lib64/libc-2.30.so FAIL 40da7abe89f631f60538a17686a7d65c6a02ed31 /usr/lib64/ld-2.30.so FAIL a674f7a47c78e35a088104647b9640710277b489 /usr/sbin/sshd FAIL e5cb4ca25f46485bdbc691c3a92e7e111dac3ef2 /usr/bin/bash FAIL 9bc8589108223c944b452f0819298a0c3cba6215 /usr/bin/find - add build ids with debuginfod setup pointing to record server: $ perf buildid-cache -a perf.data --debuginfod http://192.168.122.174:8002 No kallsyms or vmlinux with build-id 5dcec522abf136fcfd3128f47e131f2365834dd7 was found OK 5dcec522abf136fcfd3128f47e131f2365834dd7 [kernel.kallsyms] OK 5784f813b727a50cfd3363234aef9fcbab685cc4 /lib/modules/5.10.0-rc2speed+/kernel/fs/xfs/xfs.ko OK 589e403a34f55486bcac848a45e00bcdeedd1ca8 /usr/lib64/libcrypto.so.1.1.1g OK 94569566d4eac7e9c87ba029d43d4e2158f9527e /usr/lib64/libpthread-2.30.so OK 559b9702bebe31c6d132c8dc5cc887673d65d5b5 /usr/lib64/libc-2.30.so OK 40da7abe89f631f60538a17686a7d65c6a02ed31 /usr/lib64/ld-2.30.so OK a674f7a47c78e35a088104647b9640710277b489 /usr/sbin/sshd OK e5cb4ca25f46485bdbc691c3a92e7e111dac3ef2 /usr/bin/bash OK 9bc8589108223c944b452f0819298a0c3cba6215 /usr/bin/find - and report works: $ perf report --stdio ... 76.73% swapper [kernel.kallsyms] [k] native_safe_halt 1.91% find [kernel.kallsyms] [k] queue_work_on 0.93% sshd [kernel.kallsyms] [k] iowrite16 0.83% swapper [kernel.kallsyms] [k] finish_task_switch 0.72% kworker/u40:2-e [kernel.kallsyms] [k] process_one_work 0.70% kworker/u40:0-e [kernel.kallsyms] [k] process_one_work 0.64% find [kernel.kallsyms] [k] syscall_enter_from_user_mode 0.63% find [kernel.kallsyms] [k] _raw_spin_unlock_irqrestore - because we have the data in build id cache: $ find ~/.debug | head -10 .../.debug .../.debug/home .../.debug/home/jolsa .../.debug/home/jolsa/.cache .../.debug/home/jolsa/.cache/debuginfod_client .../.debug/home/jolsa/.cache/debuginfod_client/5dcec522abf136fcfd3128f47e131f2365834dd7 .../.debug/home/jolsa/.cache/debuginfod_client/5dcec522abf136fcfd3128f47e131f2365834dd7/executable .../.debug/home/jolsa/.cache/debuginfod_client/5dcec522abf136fcfd3128f47e131f2365834dd7/executable/5dcec522abf136fcfd3128f47e131f2365834dd7 .../.debug/home/jolsa/.cache/debuginfod_client/5dcec522abf136fcfd3128f47e131f2365834dd7/executable/5dcec522abf136fcfd3128f47e131f2365834dd7/elf .../.debug/home/jolsa/.cache/debuginfod_client/5dcec522abf136fcfd3128f47e131f2365834dd7/executable/5dcec522abf136fcfd3128f47e131f2365834dd7/debug Available also in: git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git perf/build_id thanks, jirka ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
-rw-r--r--include/linux/buildid.h12
-rw-r--r--include/uapi/linux/perf_event.h42
-rw-r--r--kernel/bpf/stackmap.c143
-rw-r--r--kernel/events/core.c32
-rw-r--r--lib/Makefile3
-rw-r--r--lib/buildid.c149
6 files changed, 232 insertions, 149 deletions
diff --git a/include/linux/buildid.h b/include/linux/buildid.h
new file mode 100644
index 000000000000..40232f90db6e
--- /dev/null
+++ b/include/linux/buildid.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BUILDID_H
+#define _LINUX_BUILDID_H
+
+#include <linux/mm_types.h>
+
+#define BUILD_ID_SIZE_MAX 20
+
+int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
+ __u32 *size);
+
+#endif
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b15e3447cd9f..cb6f84103560 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -386,7 +386,8 @@ struct perf_event_attr {
aux_output : 1, /* generate AUX records instead of events */
cgroup : 1, /* include cgroup events */
text_poke : 1, /* include text poke events */
- __reserved_1 : 30;
+ build_id : 1, /* use build id in mmap2 events */
+ __reserved_1 : 29;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -659,6 +660,22 @@ struct perf_event_mmap_page {
__u64 aux_size;
};
+/*
+ * The current state of perf_event_header::misc bits usage:
+ * ('|' used bit, '-' unused bit)
+ *
+ * 012 CDEF
+ * |||---------||||
+ *
+ * Where:
+ * 0-2 CPUMODE_MASK
+ *
+ * C PROC_MAP_PARSE_TIMEOUT
+ * D MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT
+ * E MMAP_BUILD_ID / EXACT_IP / SCHED_OUT_PREEMPT
+ * F (reserved)
+ */
+
#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0)
#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
#define PERF_RECORD_MISC_KERNEL (1 << 0)
@@ -690,6 +707,7 @@ struct perf_event_mmap_page {
*
* PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events
* PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events
+ * PERF_RECORD_MISC_MMAP_BUILD_ID - PERF_RECORD_MMAP2 event
*
*
* PERF_RECORD_MISC_EXACT_IP:
@@ -699,9 +717,13 @@ struct perf_event_mmap_page {
*
* PERF_RECORD_MISC_SWITCH_OUT_PREEMPT:
* Indicates that thread was preempted in TASK_RUNNING state.
+ *
+ * PERF_RECORD_MISC_MMAP_BUILD_ID:
+ * Indicates that mmap2 event carries build id data.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
+#define PERF_RECORD_MISC_MMAP_BUILD_ID (1 << 14)
/*
* Reserve the last bit to indicate some extended misc field
*/
@@ -915,10 +937,20 @@ enum perf_event_type {
* u64 addr;
* u64 len;
* u64 pgoff;
- * u32 maj;
- * u32 min;
- * u64 ino;
- * u64 ino_generation;
+ * union {
+ * struct {
+ * u32 maj;
+ * u32 min;
+ * u64 ino;
+ * u64 ino_generation;
+ * };
+ * struct {
+ * u8 build_id_size;
+ * u8 __reserved_1;
+ * u16 __reserved_2;
+ * u8 build_id[20];
+ * };
+ * };
* u32 prot, flags;
* char filename[];
* struct sample_id sample_id;
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index aea96b638473..cabaf7db8efc 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -7,10 +7,9 @@
#include <linux/kernel.h>
#include <linux/stacktrace.h>
#include <linux/perf_event.h>
-#include <linux/elf.h>
-#include <linux/pagemap.h>
#include <linux/irq_work.h>
#include <linux/btf_ids.h>
+#include <linux/buildid.h>
#include "percpu_freelist.h"
#define STACK_CREATE_FLAG_MASK \
@@ -143,140 +142,6 @@ free_smap:
return ERR_PTR(err);
}
-#define BPF_BUILD_ID 3
-/*
- * Parse build id from the note segment. This logic can be shared between
- * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
- * identical.
- */
-static inline int stack_map_parse_build_id(void *page_addr,
- unsigned char *build_id,
- void *note_start,
- Elf32_Word note_size)
-{
- Elf32_Word note_offs = 0, new_offs;
-
- /* check for overflow */
- if (note_start < page_addr || note_start + note_size < note_start)
- return -EINVAL;
-
- /* only supports note that fits in the first page */
- if (note_start + note_size > page_addr + PAGE_SIZE)
- return -EINVAL;
-
- while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
- Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
-
- if (nhdr->n_type == BPF_BUILD_ID &&
- nhdr->n_namesz == sizeof("GNU") &&
- nhdr->n_descsz > 0 &&
- nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
- memcpy(build_id,
- note_start + note_offs +
- ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
- nhdr->n_descsz);
- memset(build_id + nhdr->n_descsz, 0,
- BPF_BUILD_ID_SIZE - nhdr->n_descsz);
- return 0;
- }
- new_offs = note_offs + sizeof(Elf32_Nhdr) +
- ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
- if (new_offs <= note_offs) /* overflow */
- break;
- note_offs = new_offs;
- }
- return -EINVAL;
-}
-
-/* Parse build ID from 32-bit ELF */
-static int stack_map_get_build_id_32(void *page_addr,
- unsigned char *build_id)
-{
- Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
- Elf32_Phdr *phdr;
- int i;
-
- /* only supports phdr that fits in one page */
- if (ehdr->e_phnum >
- (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
- return -EINVAL;
-
- phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
-
- for (i = 0; i < ehdr->e_phnum; ++i) {
- if (phdr[i].p_type == PT_NOTE &&
- !stack_map_parse_build_id(page_addr, build_id,
- page_addr + phdr[i].p_offset,
- phdr[i].p_filesz))
- return 0;
- }
- return -EINVAL;
-}
-
-/* Parse build ID from 64-bit ELF */
-static int stack_map_get_build_id_64(void *page_addr,
- unsigned char *build_id)
-{
- Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
- Elf64_Phdr *phdr;
- int i;
-
- /* only supports phdr that fits in one page */
- if (ehdr->e_phnum >
- (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
- return -EINVAL;
-
- phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
-
- for (i = 0; i < ehdr->e_phnum; ++i) {
- if (phdr[i].p_type == PT_NOTE &&
- !stack_map_parse_build_id(page_addr, build_id,
- page_addr + phdr[i].p_offset,
- phdr[i].p_filesz))
- return 0;
- }
- return -EINVAL;
-}
-
-/* Parse build ID of ELF file mapped to vma */
-static int stack_map_get_build_id(struct vm_area_struct *vma,
- unsigned char *build_id)
-{
- Elf32_Ehdr *ehdr;
- struct page *page;
- void *page_addr;
- int ret;
-
- /* only works for page backed storage */
- if (!vma->vm_file)
- return -EINVAL;
-
- page = find_get_page(vma->vm_file->f_mapping, 0);
- if (!page)
- return -EFAULT; /* page not mapped */
-
- ret = -EINVAL;
- page_addr = kmap_atomic(page);
- ehdr = (Elf32_Ehdr *)page_addr;
-
- /* compare magic x7f "ELF" */
- if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
- goto out;
-
- /* only support executable file and shared object file */
- if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
- goto out;
-
- if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
- ret = stack_map_get_build_id_32(page_addr, build_id);
- else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
- ret = stack_map_get_build_id_64(page_addr, build_id);
-out:
- kunmap_atomic(page_addr);
- put_page(page);
- return ret;
-}
-
static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
u64 *ips, u32 trace_nr, bool user)
{
@@ -317,18 +182,18 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
for (i = 0; i < trace_nr; i++) {
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
- memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
+ memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
}
return;
}
for (i = 0; i < trace_nr; i++) {
vma = find_vma(current->mm, ips[i]);
- if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) {
+ if (!vma || build_id_parse(vma, id_offs[i].build_id, NULL)) {
/* per entry fall back to ips */
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
id_offs[i].ip = ips[i];
- memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
+ memset(id_offs[i].build_id, 0, BUILD_ID_SIZE_MAX);
continue;
}
id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 55d18791a72d..c37401e3e5f7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -53,6 +53,7 @@
#include <linux/min_heap.h>
#include <linux/highmem.h>
#include <linux/pgtable.h>
+#include <linux/buildid.h>
#include "internal.h"
@@ -397,6 +398,7 @@ static atomic_t nr_ksymbol_events __read_mostly;
static atomic_t nr_bpf_events __read_mostly;
static atomic_t nr_cgroup_events __read_mostly;
static atomic_t nr_text_poke_events __read_mostly;
+static atomic_t nr_build_id_events __read_mostly;
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
@@ -4673,6 +4675,8 @@ static void unaccount_event(struct perf_event *event)
dec = true;
if (event->attr.mmap || event->attr.mmap_data)
atomic_dec(&nr_mmap_events);
+ if (event->attr.build_id)
+ atomic_dec(&nr_build_id_events);
if (event->attr.comm)
atomic_dec(&nr_comm_events);
if (event->attr.namespaces)
@@ -8046,6 +8050,8 @@ struct perf_mmap_event {
u64 ino;
u64 ino_generation;
u32 prot, flags;
+ u8 build_id[BUILD_ID_SIZE_MAX];
+ u32 build_id_size;
struct {
struct perf_event_header header;
@@ -8077,6 +8083,7 @@ static void perf_event_mmap_output(struct perf_event *event,
struct perf_sample_data sample;
int size = mmap_event->event_id.header.size;
u32 type = mmap_event->event_id.header.type;
+ bool use_build_id;
int ret;
if (!perf_event_mmap_match(event, data))
@@ -8101,13 +8108,25 @@ static void perf_event_mmap_output(struct perf_event *event,
mmap_event->event_id.pid = perf_event_pid(event, current);
mmap_event->event_id.tid = perf_event_tid(event, current);
+ use_build_id = event->attr.build_id && mmap_event->build_id_size;
+
+ if (event->attr.mmap2 && use_build_id)
+ mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_BUILD_ID;
+
perf_output_put(&handle, mmap_event->event_id);
if (event->attr.mmap2) {
- perf_output_put(&handle, mmap_event->maj);
- perf_output_put(&handle, mmap_event->min);
- perf_output_put(&handle, mmap_event->ino);
- perf_output_put(&handle, mmap_event->ino_generation);
+ if (use_build_id) {
+ u8 size[4] = { (u8) mmap_event->build_id_size, 0, 0, 0 };
+
+ __output_copy(&handle, size, 4);
+ __output_copy(&handle, mmap_event->build_id, BUILD_ID_SIZE_MAX);
+ } else {
+ perf_output_put(&handle, mmap_event->maj);
+ perf_output_put(&handle, mmap_event->min);
+ perf_output_put(&handle, mmap_event->ino);
+ perf_output_put(&handle, mmap_event->ino_generation);
+ }
perf_output_put(&handle, mmap_event->prot);
perf_output_put(&handle, mmap_event->flags);
}
@@ -8236,6 +8255,9 @@ got_name:
mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
+ if (atomic_read(&nr_build_id_events))
+ build_id_parse(vma, mmap_event->build_id, &mmap_event->build_id_size);
+
perf_iterate_sb(perf_event_mmap_output,
mmap_event,
NULL);
@@ -11172,6 +11194,8 @@ static void account_event(struct perf_event *event)
inc = true;
if (event->attr.mmap || event->attr.mmap_data)
atomic_inc(&nr_mmap_events);
+ if (event->attr.build_id)
+ atomic_inc(&nr_build_id_events);
if (event->attr.comm)
atomic_inc(&nr_comm_events);
if (event->attr.namespaces)
diff --git a/lib/Makefile b/lib/Makefile
index afeff05fa8c5..a6b160c3a4fa 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -36,7 +36,8 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
- nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o
+ nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o \
+ buildid.o
lib-$(CONFIG_PRINTK) += dump_stack.o
lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/buildid.c b/lib/buildid.c
new file mode 100644
index 000000000000..6156997c3895
--- /dev/null
+++ b/lib/buildid.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/buildid.h>
+#include <linux/elf.h>
+#include <linux/pagemap.h>
+
+#define BUILD_ID 3
+/*
+ * Parse build id from the note segment. This logic can be shared between
+ * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
+ * identical.
+ */
+static inline int parse_build_id(void *page_addr,
+ unsigned char *build_id,
+ __u32 *size,
+ void *note_start,
+ Elf32_Word note_size)
+{
+ Elf32_Word note_offs = 0, new_offs;
+
+ /* check for overflow */
+ if (note_start < page_addr || note_start + note_size < note_start)
+ return -EINVAL;
+
+ /* only supports note that fits in the first page */
+ if (note_start + note_size > page_addr + PAGE_SIZE)
+ return -EINVAL;
+
+ while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
+ Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
+
+ if (nhdr->n_type == BUILD_ID &&
+ nhdr->n_namesz == sizeof("GNU") &&
+ nhdr->n_descsz > 0 &&
+ nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
+ memcpy(build_id,
+ note_start + note_offs +
+ ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
+ nhdr->n_descsz);
+ memset(build_id + nhdr->n_descsz, 0,
+ BUILD_ID_SIZE_MAX - nhdr->n_descsz);
+ if (size)
+ *size = nhdr->n_descsz;
+ return 0;
+ }
+ new_offs = note_offs + sizeof(Elf32_Nhdr) +
+ ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
+ if (new_offs <= note_offs) /* overflow */
+ break;
+ note_offs = new_offs;
+ }
+ return -EINVAL;
+}
+
+/* Parse build ID from 32-bit ELF */
+static int get_build_id_32(void *page_addr, unsigned char *build_id,
+ __u32 *size)
+{
+ Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
+ Elf32_Phdr *phdr;
+ int i;
+
+ /* only supports phdr that fits in one page */
+ if (ehdr->e_phnum >
+ (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
+ return -EINVAL;
+
+ phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
+
+ for (i = 0; i < ehdr->e_phnum; ++i) {
+ if (phdr[i].p_type == PT_NOTE &&
+ !parse_build_id(page_addr, build_id, size,
+ page_addr + phdr[i].p_offset,
+ phdr[i].p_filesz))
+ return 0;
+ }
+ return -EINVAL;
+}
+
+/* Parse build ID from 64-bit ELF */
+static int get_build_id_64(void *page_addr, unsigned char *build_id,
+ __u32 *size)
+{
+ Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
+ Elf64_Phdr *phdr;
+ int i;
+
+ /* only supports phdr that fits in one page */
+ if (ehdr->e_phnum >
+ (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
+ return -EINVAL;
+
+ phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
+
+ for (i = 0; i < ehdr->e_phnum; ++i) {
+ if (phdr[i].p_type == PT_NOTE &&
+ !parse_build_id(page_addr, build_id, size,
+ page_addr + phdr[i].p_offset,
+ phdr[i].p_filesz))
+ return 0;
+ }
+ return -EINVAL;
+}
+
+/*
+ * Parse build ID of ELF file mapped to vma
+ * @vma: vma object
+ * @build_id: buffer to store build id, at least BUILD_ID_SIZE long
+ * @size: returns actual build id size in case of success
+ *
+ * Returns 0 on success, otherwise error (< 0).
+ */
+int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
+ __u32 *size)
+{
+ Elf32_Ehdr *ehdr;
+ struct page *page;
+ void *page_addr;
+ int ret;
+
+ /* only works for page backed storage */
+ if (!vma->vm_file)
+ return -EINVAL;
+
+ page = find_get_page(vma->vm_file->f_mapping, 0);
+ if (!page)
+ return -EFAULT; /* page not mapped */
+
+ ret = -EINVAL;
+ page_addr = kmap_atomic(page);
+ ehdr = (Elf32_Ehdr *)page_addr;
+
+ /* compare magic x7f "ELF" */
+ if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
+ goto out;
+
+ /* only support executable file and shared object file */
+ if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
+ goto out;
+
+ if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
+ ret = get_build_id_32(page_addr, build_id, size);
+ else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
+ ret = get_build_id_64(page_addr, build_id, size);
+out:
+ kunmap_atomic(page_addr);
+ put_page(page);
+ return ret;
+}