51 files changed, 1981 insertions, 108 deletions
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index 8c10955eff93..3ef07a12aa14 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -9,7 +9,7 @@ size=128
 config=0
 sample_period=*
 sample_type=263
-read_format=0|4
+read_format=0|4|20
 disabled=1
 inherit=1
 pinned=0
diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy
index 86a15dd359d9..8fec06eda5f9 100644
--- a/tools/perf/tests/attr/system-wide-dummy
+++ b/tools/perf/tests/attr/system-wide-dummy
@@ -11,7 +11,7 @@ size=128
 config=9
 sample_period=4000
 sample_type=455
-read_format=4
+read_format=4|20
 # Event will be enabled right away.
 disabled=0
 inherit=1
diff --git a/tools/perf/tests/attr/test-record-group b/tools/perf/tests/attr/test-record-group
index 14ee60fd3f41..6c1cff8aae8b 100644
--- a/tools/perf/tests/attr/test-record-group
+++ b/tools/perf/tests/attr/test-record-group
@@ -7,14 +7,14 @@ ret     = 1
 fd=1
 group_fd=-1
 sample_type=327
-read_format=4
+read_format=4|20
 
 [event-2:base-record]
 fd=2
 group_fd=1
 config=1
 sample_type=327
-read_format=4
+read_format=4|20
 mmap=0
 comm=0
 task=0
diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling
index 300b9f7e6d69..97e7e64a38f0 100644
--- a/tools/perf/tests/attr/test-record-group-sampling
+++ b/tools/perf/tests/attr/test-record-group-sampling
@@ -7,7 +7,7 @@ ret     = 1
 fd=1
 group_fd=-1
 sample_type=343
-read_format=12
+read_format=12|28
 inherit=0
 
 [event-2:base-record]
@@ -21,8 +21,8 @@ config=3
 # default | PERF_SAMPLE_READ
 sample_type=343
 
-# PERF_FORMAT_ID | PERF_FORMAT_GROUP
-read_format=12
+# PERF_FORMAT_ID | PERF_FORMAT_GROUP  | PERF_FORMAT_LOST
+read_format=12|28
 task=0
 mmap=0
 comm=0
diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/attr/test-record-group1
index 3ffe246e0228..eeb1db392bc9 100644
--- a/tools/perf/tests/attr/test-record-group1
+++ b/tools/perf/tests/attr/test-record-group1
@@ -7,7 +7,7 @@ ret     = 1
 fd=1
 group_fd=-1
 sample_type=327
-read_format=4
+read_format=4|20
 
 [event-2:base-record]
 fd=2
@@ -15,7 +15,7 @@ group_fd=1
 type=0
 config=1
 sample_type=327
-read_format=4
+read_format=4|20
 mmap=0
 comm=0
 task=0
diff --git a/tools/perf/tests/attr/test-record-group2 b/tools/perf/tests/attr/test-record-group2
index 6b9f8d182ce1..cebdaa8e64e4 100644
--- a/tools/perf/tests/attr/test-record-group2
+++ b/tools/perf/tests/attr/test-record-group2
@@ -9,7 +9,7 @@ group_fd=-1
 config=0|1
 sample_period=1234000
 sample_type=87
-read_format=12
+read_format=12|28
 inherit=0
 freq=0
 
@@ -19,7 +19,7 @@ group_fd=1
 config=0|1
 sample_period=6789000
 sample_type=87
-read_format=12
+read_format=12|28
 disabled=0
 inherit=0
 mmap=0
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index f94929ebb54b..7c873c6ae3eb 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -17,29 +17,31 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused,
 			 struct machine *machine __maybe_unused)
 {
 	struct perf_record_cpu_map *map_event = &event->cpu_map;
-	struct perf_record_record_cpu_map *mask;
 	struct perf_record_cpu_map_data *data;
 	struct perf_cpu_map *map;
-	int i;
+	unsigned int long_size;
 
 	data = &map_event->data;
 
 	TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__MASK);
 
-	mask = (struct perf_record_record_cpu_map *)data->data;
+	long_size = data->mask32_data.long_size;
 
-	TEST_ASSERT_VAL("wrong nr",   mask->nr == 1);
+	TEST_ASSERT_VAL("wrong long_size", long_size == 4 || long_size == 8);
 
-	for (i = 0; i < 20; i++) {
-		TEST_ASSERT_VAL("wrong cpu", test_bit(i, mask->mask));
-	}
+	TEST_ASSERT_VAL("wrong nr",   data->mask32_data.nr == 1);
+
+	TEST_ASSERT_VAL("wrong cpu", perf_record_cpu_map_data__test_bit(0, data));
+	TEST_ASSERT_VAL("wrong cpu", !perf_record_cpu_map_data__test_bit(1, data));
+	for (int i = 2; i <= 20; i++)
+		TEST_ASSERT_VAL("wrong cpu", perf_record_cpu_map_data__test_bit(i, data));
 
 	map = cpu_map__new_data(data);
 	TEST_ASSERT_VAL("wrong nr",  perf_cpu_map__nr(map) == 20);
 
-	for (i = 0; i < 20; i++) {
-		TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, i).cpu == i);
-	}
+	TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, 0).cpu == 0);
+	for (int i = 2; i <= 20; i++)
+		TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, i - 1).cpu == i);
 
 	perf_cpu_map__put(map);
 	return 0;
@@ -51,7 +53,6 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
 			 struct machine *machine __maybe_unused)
 {
 	struct perf_record_cpu_map *map_event = &event->cpu_map;
-	struct cpu_map_entries *cpus;
 	struct perf_record_cpu_map_data *data;
 	struct perf_cpu_map *map;
 
@@ -59,11 +60,9 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
 
 	TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__CPUS);
 
-	cpus = (struct cpu_map_entries *)data->data;
-
-	TEST_ASSERT_VAL("wrong nr",   cpus->nr == 2);
-	TEST_ASSERT_VAL("wrong cpu",  cpus->cpu[0] == 1);
-	TEST_ASSERT_VAL("wrong cpu",  cpus->cpu[1] == 256);
+	TEST_ASSERT_VAL("wrong nr",   data->cpus_data.nr == 2);
+	TEST_ASSERT_VAL("wrong cpu",  data->cpus_data.cpu[0] == 1);
+	TEST_ASSERT_VAL("wrong cpu",  data->cpus_data.cpu[1] == 256);
 
 	map = cpu_map__new_data(data);
 	TEST_ASSERT_VAL("wrong nr",  perf_cpu_map__nr(map) == 2);
@@ -74,26 +73,60 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
+static int process_event_range_cpus(struct perf_tool *tool __maybe_unused,
+				union perf_event *event,
+				struct perf_sample *sample __maybe_unused,
+				struct machine *machine __maybe_unused)
+{
+	struct perf_record_cpu_map *map_event = &event->cpu_map;
+	struct perf_record_cpu_map_data *data;
+	struct perf_cpu_map *map;
+
+	data = &map_event->data;
+
+	TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__RANGE_CPUS);
+
+	TEST_ASSERT_VAL("wrong any_cpu",   data->range_cpu_data.any_cpu == 0);
+	TEST_ASSERT_VAL("wrong start_cpu", data->range_cpu_data.start_cpu == 1);
+	TEST_ASSERT_VAL("wrong end_cpu",   data->range_cpu_data.end_cpu == 256);
+
+	map = cpu_map__new_data(data);
+	TEST_ASSERT_VAL("wrong nr",  perf_cpu_map__nr(map) == 256);
+	TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, 0).cpu == 1);
+	TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__max(map).cpu == 256);
+	TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1);
+	perf_cpu_map__put(map);
+	return 0;
+}
+
 
 static int test__cpu_map_synthesize(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 	struct perf_cpu_map *cpus;
 
-	/* This one is better stores in mask. */
-	cpus = perf_cpu_map__new("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19");
+	/* This one is better stored in a mask. */
+	cpus = perf_cpu_map__new("0,2-20");
 
 	TEST_ASSERT_VAL("failed to synthesize map",
 		!perf_event__synthesize_cpu_map(NULL, cpus, process_event_mask, NULL));
 
 	perf_cpu_map__put(cpus);
 
-	/* This one is better stores in cpu values. */
+	/* This one is better stored in cpu values. */
 	cpus = perf_cpu_map__new("1,256");
 
 	TEST_ASSERT_VAL("failed to synthesize map",
 		!perf_event__synthesize_cpu_map(NULL, cpus, process_event_cpus, NULL));
 
 	perf_cpu_map__put(cpus);
+
+	/* This one is better stored as a range. */
+	cpus = perf_cpu_map__new("1-256");
+
+	TEST_ASSERT_VAL("failed to synthesize map",
+		!perf_event__synthesize_cpu_map(NULL, cpus, process_event_range_cpus, NULL));
+
+	perf_cpu_map__put(cpus);
 	return 0;
 }
 
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index 78db4d704e76..d093a9b878d1 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -21,7 +21,7 @@ static int process_event_unit(struct perf_tool *tool __maybe_unused,
 
 	TEST_ASSERT_VAL("wrong id", ev->id == 123);
 	TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__UNIT);
-	TEST_ASSERT_VAL("wrong unit", !strcmp(ev->data, "KRAVA"));
+	TEST_ASSERT_VAL("wrong unit", !strcmp(ev->unit, "KRAVA"));
 	return 0;
 }
 
@@ -31,13 +31,10 @@ static int process_event_scale(struct perf_tool *tool __maybe_unused,
 			       struct machine *machine __maybe_unused)
 {
 	struct perf_record_event_update *ev = (struct perf_record_event_update *)event;
-	struct perf_record_event_update_scale *ev_data;
-
-	ev_data = (struct perf_record_event_update_scale *)ev->data;
 
 	TEST_ASSERT_VAL("wrong id", ev->id == 123);
 	TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__SCALE);
-	TEST_ASSERT_VAL("wrong scale", ev_data->scale == 0.123);
+	TEST_ASSERT_VAL("wrong scale", ev->scale.scale == 0.123);
 	return 0;
 }
 
@@ -56,7 +53,7 @@ static int process_event_name(struct perf_tool *tool,
 
 	TEST_ASSERT_VAL("wrong id", ev->id == 123);
 	TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__NAME);
-	TEST_ASSERT_VAL("wrong name", !strcmp(ev->data, tmp->name));
+	TEST_ASSERT_VAL("wrong name", !strcmp(ev->name, tmp->name));
 	return 0;
 }
 
@@ -66,12 +63,9 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
 			      struct machine *machine __maybe_unused)
 {
 	struct perf_record_event_update *ev = (struct perf_record_event_update *)event;
-	struct perf_record_event_update_cpus *ev_data;
 	struct perf_cpu_map *map;
 
-	ev_data = (struct perf_record_event_update_cpus *) ev->data;
-
-	map = cpu_map__new_data(&ev_data->cpus);
+	map = cpu_map__new_data(&ev->cpus.cpus);
 
 	TEST_ASSERT_VAL("wrong id", ev->id == 123);
 	TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS);
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 2efe9e3a63b8..6512f5e22045 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include "util/cputopo.h"
 #include "util/debug.h"
 #include "util/expr.h"
 #include "util/header.h"
@@ -94,6 +95,10 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 	ret |= test(ctx, "min(1,2) + 1", 2);
 	ret |= test(ctx, "max(1,2) + 1", 3);
 	ret |= test(ctx, "1+1 if 3*4 else 0", 2);
+	ret |= test(ctx, "100 if 1 else 200 if 1 else 300", 100);
+	ret |= test(ctx, "100 if 0 else 200 if 1 else 300", 200);
+	ret |= test(ctx, "100 if 1 else 200 if 0 else 300", 100);
+	ret |= test(ctx, "100 if 0 else 200 if 0 else 300", 300);
 	ret |= test(ctx, "1.1 + 2.1", 3.2);
 	ret |= test(ctx, ".1 + 2.", 2.1);
 	ret |= test(ctx, "d_ratio(1, 2)", 0.5);
@@ -133,7 +138,7 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 						    (void **)&val_ptr));
 
 	expr__ctx_clear(ctx);
-	ctx->runtime = 3;
+	ctx->sctx.runtime = 3;
 	TEST_ASSERT_VAL("find ids",
 			expr__find_ids("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@",
 					NULL, ctx) == 0);
@@ -154,15 +159,33 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 						    (void **)&val_ptr));
 
 	/* Only EVENT1 or EVENT2 need be measured depending on the value of smt_on. */
-	expr__ctx_clear(ctx);
-	TEST_ASSERT_VAL("find ids",
-			expr__find_ids("EVENT1 if #smt_on else EVENT2",
-				NULL, ctx) == 0);
-	TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1);
-	TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids,
-						  smt_on() ? "EVENT1" : "EVENT2",
-						  (void **)&val_ptr));
+	{
+		struct cpu_topology *topology = cpu_topology__new();
+		bool smton = smt_on(topology);
+		bool corewide = core_wide(/*system_wide=*/false,
+					  /*user_requested_cpus=*/false,
+					  topology);
+
+		cpu_topology__delete(topology);
+		expr__ctx_clear(ctx);
+		TEST_ASSERT_VAL("find ids",
+				expr__find_ids("EVENT1 if #smt_on else EVENT2",
+					NULL, ctx) == 0);
+		TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1);
+		TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids,
+							  smton ? "EVENT1" : "EVENT2",
+							  (void **)&val_ptr));
+
+		expr__ctx_clear(ctx);
+		TEST_ASSERT_VAL("find ids",
+				expr__find_ids("EVENT1 if #core_wide else EVENT2",
+					NULL, ctx) == 0);
+		TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1);
+		TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids,
+							  corewide ? "EVENT1" : "EVENT2",
+							  (void **)&val_ptr));
 
+	}
 	/* The expression is a constant 1.0 without needing to evaluate EVENT1. */
 	expr__ctx_clear(ctx);
 	TEST_ASSERT_VAL("find ids",
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index dfb6173b2a82..8322fc2295fa 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -1,8 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <errno.h>
 #include <inttypes.h>
-/* For the CLR_() macros */
-#include <pthread.h>
 #include <stdlib.h>
 #include <perf/cpumap.h>
 
@@ -114,8 +112,7 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest
 
 	for (i = 0; i < nsyscalls; ++i)
 		for (j = 0; j < expected_nr_events[i]; ++j) {
-			int foo = syscalls[i]();
-			++foo;
+			syscalls[i]();
 		}
 
 	md = &evlist->mmap[0];
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index 90828ae03ef5..f3275be83a33 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -2,7 +2,7 @@
 #include <errno.h>
 #include <inttypes.h>
 /* For the CPU_* macros */
-#include <pthread.h>
+#include <sched.h>
 
 #include <sys/types.h>
 #include <sys/stat.h>
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 6a001fcfed68..7aa946aa886d 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -2,8 +2,6 @@
 #include <errno.h>
 #include <inttypes.h>
 #include <linux/string.h>
-/* For the CLR_() macros */
-#include <pthread.h>
 
 #include <sched.h>
 #include <perf/mmap.h>
@@ -332,7 +330,7 @@ out_delete_evlist:
 out:
 	if (err == -EACCES)
 		return TEST_SKIP;
-	if (err < 0)
+	if (err < 0 || errs != 0)
 		return TEST_FAIL;
 	return TEST_OK;
 }
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 07f2411b0ad4..20930dd48ee0 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -86,10 +86,15 @@ static bool samples_same(const struct perf_sample *s1,
 			COMP(read.time_running);
 		/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
 		if (read_format & PERF_FORMAT_GROUP) {
-			for (i = 0; i < s1->read.group.nr; i++)
-				MCOMP(read.group.values[i]);
+			for (i = 0; i < s1->read.group.nr; i++) {
+				/* FIXME: check values without LOST */
+				if (read_format & PERF_FORMAT_LOST)
+					MCOMP(read.group.values[i]);
+			}
 		} else {
 			COMP(read.one.id);
+			if (read_format & PERF_FORMAT_LOST)
+				COMP(read.one.lost);
 		}
 	}
 
@@ -263,7 +268,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
 			.data	= (void *)aux_data,
 		},
 	};
-	struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},};
+	struct sample_read_value values[] = {{1, 5, 0}, {9, 3, 0}, {2, 7, 0}, {6, 4, 1},};
 	struct perf_sample sample_out, sample_out_endian;
 	size_t i, sz, bufsz;
 	int err, ret = -1;
@@ -286,6 +291,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
 	} else {
 		sample.read.one.value = 0x08789faeb786aa87ULL;
 		sample.read.one.id    = 99;
+		sample.read.one.lost  = 1;
 	}
 
 	sz = perf_event__sample_event_size(&sample, sample_type, read_format);
@@ -370,7 +376,7 @@ out_free:
  */
 static int test__sample_parsing(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
-	const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15};
+	const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 28, 29, 30, 31};
 	u64 sample_type;
 	u64 sample_regs;
 	size_t i;
diff --git a/tools/perf/tests/shell/coresight/Makefile b/tools/perf/tests/shell/coresight/Makefile
new file mode 100644
index 000000000000..b070e779703e
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+include ../../../../../tools/scripts/Makefile.include
+include ../../../../../tools/scripts/Makefile.arch
+include ../../../../../tools/scripts/utilities.mak
+
+SUBDIRS = \
+	asm_pure_loop \
+	memcpy_thread \
+	thread_loop \
+	unroll_loop_thread
+
+all: $(SUBDIRS)
+$(SUBDIRS):
+	@$(MAKE) -C $@ >/dev/null
+
+INSTALLDIRS = $(SUBDIRS:%=install-%)
+
+install-tests: $(INSTALLDIRS)
+$(INSTALLDIRS):
+	@$(MAKE) -C $(@:install-%=%) install-tests >/dev/null
+
+CLEANDIRS = $(SUBDIRS:%=clean-%)
+
+clean: $(CLEANDIRS)
+$(CLEANDIRS):
+	$(call QUIET_CLEAN, test-$(@:clean-%=%)) $(Q)$(MAKE) -C $(@:clean-%=%) clean >/dev/null
+
+.PHONY: all clean $(SUBDIRS) $(CLEANDIRS) $(INSTALLDIRS)
diff --git a/tools/perf/tests/shell/coresight/Makefile.miniconfig b/tools/perf/tests/shell/coresight/Makefile.miniconfig
new file mode 100644
index 000000000000..5f72a9cb43f3
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/Makefile.miniconfig
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+
+ifndef DESTDIR
+prefix ?= $(HOME)
+endif
+
+DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
+INSTALL = install
+INSTDIR_SUB = tests/shell/coresight
+
+include ../../../../../scripts/Makefile.include
+include ../../../../../scripts/Makefile.arch
+include ../../../../../scripts/utilities.mak
diff --git a/tools/perf/tests/shell/coresight/asm_pure_loop.sh b/tools/perf/tests/shell/coresight/asm_pure_loop.sh
new file mode 100755
index 000000000000..569e9d46162b
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/asm_pure_loop.sh
@@ -0,0 +1,18 @@
+#!/bin/sh -e
+# CoreSight / ASM Pure Loop
+
+# SPDX-License-Identifier: GPL-2.0
+# Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+
+TEST="asm_pure_loop"
+. $(dirname $0)/../lib/coresight.sh
+ARGS=""
+DATV="out"
+DATA="$DATD/perf-$TEST-$DATV.data"
+
+perf record $PERFRECOPT -o "$DATA" "$BIN" $ARGS
+
+perf_dump_aux_verify "$DATA" 10 10 10
+
+err=$?
+exit $err
diff --git a/tools/perf/tests/shell/coresight/asm_pure_loop/.gitignore b/tools/perf/tests/shell/coresight/asm_pure_loop/.gitignore
new file mode 100644
index 000000000000..468673ac32e8
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/asm_pure_loop/.gitignore
@@ -0,0 +1 @@
+asm_pure_loop
diff --git a/tools/perf/tests/shell/coresight/asm_pure_loop/Makefile b/tools/perf/tests/shell/coresight/asm_pure_loop/Makefile
new file mode 100644
index 000000000000..206849e92bc9
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/asm_pure_loop/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+# Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+
+include ../Makefile.miniconfig
+
+# Binary to produce
+BIN=asm_pure_loop
+# Any linking/libraries needed for the binary - empty if none needed
+LIB=
+
+all: $(BIN)
+
+$(BIN): $(BIN).S
+ifdef CORESIGHT
+ifeq ($(ARCH),arm64)
+# Build line - this is raw asm with no libc to have an always exact binary
+	$(Q)$(CC) $(BIN).S -nostdlib -static -o $(BIN) $(LIB)
+endif
+endif
+
+install-tests: all
+ifdef CORESIGHT
+ifeq ($(ARCH),arm64)
+# Install the test tool in the right place
+	$(call QUIET_INSTALL, tests) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$(INSTDIR_SUB)/$(BIN)'; \
+		$(INSTALL) $(BIN) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$(INSTDIR_SUB)/$(BIN)/$(BIN)'
+endif
+endif
+
+clean:
+	$(Q)$(RM) -f $(BIN)
+
+.PHONY: all clean install-tests
diff --git a/tools/perf/tests/shell/coresight/asm_pure_loop/asm_pure_loop.S b/tools/perf/tests/shell/coresight/asm_pure_loop/asm_pure_loop.S
new file mode 100644
index 000000000000..75cf084a927d
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/asm_pure_loop/asm_pure_loop.S
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Tamas Zsoldos <tamas.zsoldos@arm.com>, 2021 */
+
+.globl _start
+_start:
+	mov	x0, 0x0000ffff
+	mov	x1, xzr
+loop:
+	nop
+	nop
+	cbnz	x1, noskip
+	nop
+	nop
+	adrp	x2, skip
+	add 	x2, x2, :lo12:skip
+	br	x2
+	nop
+	nop
+noskip:
+	nop
+	nop
+skip:
+	sub	x0, x0, 1
+	cbnz	x0, loop
+
+	mov	x0, #0
+	mov	x8, #93 // __NR_exit syscall
+	svc	#0
diff --git a/tools/perf/tests/shell/coresight/memcpy_thread/.gitignore b/tools/perf/tests/shell/coresight/memcpy_thread/.gitignore
new file mode 100644
index 000000000000..f8217e56091e
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/memcpy_thread/.gitignore
@@ -0,0 +1 @@
+memcpy_thread
diff --git a/tools/perf/tests/shell/coresight/memcpy_thread/Makefile b/tools/perf/tests/shell/coresight/memcpy_thread/Makefile
new file mode 100644
index 000000000000..2db637eb2c26
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/memcpy_thread/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+# Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+include ../Makefile.miniconfig
+
+# Binary to produce
+BIN=memcpy_thread
+# Any linking/libraries needed for the binary - empty if none needed
+LIB=-pthread
+
+all: $(BIN)
+
+$(BIN): $(BIN).c
+ifdef CORESIGHT
+ifeq ($(ARCH),arm64)
+# Build line
+	$(Q)$(CC) $(BIN).c -o $(BIN) $(LIB)
+endif
+endif
+
+install-tests: all
+ifdef CORESIGHT
+ifeq ($(ARCH),arm64)
+# Install the test tool in the right place
+	$(call QUIET_INSTALL, tests) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$(INSTDIR_SUB)/$(BIN)'; \
+		$(INSTALL) $(BIN) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$(INSTDIR_SUB)/$(BIN)/$(BIN)'
+endif
+endif
+
+clean:
+	$(Q)$(RM) -f $(BIN)
+
+.PHONY: all clean install-tests
diff --git a/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c b/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c
new file mode 100644
index 000000000000..a7e169d1bf64
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+// Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+
+struct args {
+	unsigned long loops;
+	unsigned long size;
+	pthread_t th;
+	void *ret;
+};
+
+static void *thrfn(void *arg)
+{
+	struct args *a = arg;
+	unsigned long i, len = a->loops;
+	unsigned char *src, *dst;
+
+	src = malloc(a->size * 1024);
+	dst = malloc(a->size * 1024);
+	if ((!src) || (!dst)) {
+		printf("ERR: Can't allocate memory\n");
+		exit(1);
+	}
+	for (i = 0; i < len; i++)
+		memcpy(dst, src, a->size * 1024);
+}
+
+static pthread_t new_thr(void *(*fn) (void *arg), void *arg)
+{
+	pthread_t t;
+	pthread_attr_t attr;
+
+	pthread_attr_init(&attr);
+	pthread_create(&t, &attr, fn, arg);
+	return t;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned long i, len, size, thr;
+	pthread_t threads[256];
+	struct args args[256];
+	long long v;
+
+	if (argc < 4) {
+		printf("ERR: %s [copysize Kb] [numthreads] [numloops (hundreds)]\n", argv[0]);
+		exit(1);
+	}
+
+	v = atoll(argv[1]);
+	if ((v < 1) || (v > (1024 * 1024))) {
+		printf("ERR: max memory 1GB (1048576 KB)\n");
+		exit(1);
+	}
+	size = v;
+	thr = atol(argv[2]);
+	if ((thr < 1) || (thr > 256)) {
+		printf("ERR: threads 1-256\n");
+		exit(1);
+	}
+	v = atoll(argv[3]);
+	if ((v < 1) || (v > 40000000000ll)) {
+		printf("ERR: loops 1-40000000000 (hundreds)\n");
+		exit(1);
+	}
+	len = v * 100;
+	for (i = 0; i < thr; i++) {
+		args[i].loops = len;
+		args[i].size = size;
+		args[i].th = new_thr(thrfn, &(args[i]));
+	}
+	for (i = 0; i < thr; i++)
+		pthread_join(args[i].th, &(args[i].ret));
+	return 0;
+}
diff --git a/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh b/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh
new file mode 100755
index 000000000000..d21ba8545938
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh
@@ -0,0 +1,18 @@
+#!/bin/sh -e
+# CoreSight / Memcpy 16k 10 Threads
+
+# SPDX-License-Identifier: GPL-2.0
+# Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+
+TEST="memcpy_thread"
+. $(dirname $0)/../lib/coresight.sh
+ARGS="16 10 1"
+DATV="16k_10"
+DATA="$DATD/perf-$TEST-$DATV.data"
+
+perf record $PERFRECOPT -o "$DATA" "$BIN" $ARGS
+
+perf_dump_aux_verify "$DATA" 10 10 10
+
+err=$?
+exit $err
diff --git a/tools/perf/tests/shell/coresight/thread_loop/.gitignore b/tools/perf/tests/shell/coresight/thread_loop/.gitignore
new file mode 100644
index 000000000000..6d4c33eaa9e8
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/thread_loop/.gitignore
@@ -0,0 +1 @@
+thread_loop
diff --git a/tools/perf/tests/shell/coresight/thread_loop/Makefile b/tools/perf/tests/shell/coresight/thread_loop/Makefile
new file mode 100644
index 000000000000..ea846c038e7a
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/thread_loop/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+# Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+include ../Makefile.miniconfig
+
+# Binary to produce
+BIN=thread_loop
+# Any linking/libraries needed for the binary - empty if none needed
+LIB=-pthread
+
+all: $(BIN)
+
+$(BIN): $(BIN).c
+ifdef CORESIGHT
+ifeq ($(ARCH),arm64)
+# Build line
+	$(Q)$(CC) $(BIN).c -o $(BIN) $(LIB)
+endif
+endif
+
+install-tests: all
+ifdef CORESIGHT
+ifeq ($(ARCH),arm64)
+# Install the test tool in the right place
+	$(call QUIET_INSTALL, tests) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$(INSTDIR_SUB)/$(BIN)'; \
+		$(INSTALL) $(BIN) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$(INSTDIR_SUB)/$(BIN)/$(BIN)'
+endif
+endif
+
+clean:
+	$(Q)$(RM) -f $(BIN)
+
+.PHONY: all clean install-tests
diff --git a/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c b/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c
new file mode 100644
index 000000000000..c0158fac7d0b
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/thread_loop/thread_loop.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+// Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+
+// define this for gettid()
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+#include <sys/syscall.h>
+#ifndef SYS_gettid
+// gettid is 178 on arm64
+# define SYS_gettid 178
+#endif
+#define gettid() syscall(SYS_gettid)
+
+struct args {
+	unsigned int loops;
+	pthread_t th;
+	void *ret;
+};
+
+static void *thrfn(void *arg)
+{
+	struct args *a = arg;
+	int i = 0, len = a->loops;
+
+	if (getenv("SHOW_TID")) {
+		unsigned long long tid = gettid();
+
+		printf("%llu\n", tid);
+	}
+	asm volatile(
+		"loop:\n"
+		"add %[i], %[i], #1\n"
+		"cmp %[i], %[len]\n"
+		"blt loop\n"
+		: /* out */
+		: /* in */ [i] "r" (i), [len] "r" (len)
+		: /* clobber */
+	);
+	return (void *)(long)i;
+}
+
+static pthread_t new_thr(void *(*fn) (void *arg), void *arg)
+{
+	pthread_t t;
+	pthread_attr_t attr;
+
+	pthread_attr_init(&attr);
+	pthread_create(&t, &attr, fn, arg);
+	return t;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned int i, len, thr;
+	pthread_t threads[256];
+	struct args args[256];
+
+	if (argc < 3) {
+		printf("ERR: %s [numthreads] [numloops (millions)]\n", argv[0]);
+		exit(1);
+	}
+
+	thr = atoi(argv[1]);
+	if ((thr < 1) || (thr > 256)) {
+		printf("ERR: threads 1-256\n");
+		exit(1);
+	}
+	len = atoi(argv[2]);
+	if ((len < 1) || (len > 4000)) {
+		printf("ERR: max loops 4000 (millions)\n");
+		exit(1);
+	}
+	len *= 1000000;
+	for (i = 0; i < thr; i++) {
+		args[i].loops = len;
+		args[i].th = new_thr(thrfn, &(args[i]));
+	}
+	for (i = 0; i < thr; i++)
+		pthread_join(args[i].th, &(args[i].ret));
+	return 0;
+}
diff --git a/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh b/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh
new file mode 100755
index 000000000000..7c13636fc778
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh
@@ -0,0 +1,19 @@
+#!/bin/sh -e
+# CoreSight / Thread Loop 10 Threads - Check TID
+
+# SPDX-License-Identifier: GPL-2.0
+# Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+
+TEST="thread_loop"
+. $(dirname $0)/../lib/coresight.sh
+ARGS="10 1"
+DATV="check-tid-10th"
+DATA="$DATD/perf-$TEST-$DATV.data"
+STDO="$DATD/perf-$TEST-$DATV.stdout"
+
+SHOW_TID=1 perf record -s $PERFRECOPT -o "$DATA" "$BIN" $ARGS > $STDO
+
+perf_dump_aux_tid_verify "$DATA" "$STDO"
+
+err=$?
+exit $err
diff --git a/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh b/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh
new file mode 100755
index 000000000000..a067145af43c
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh
@@ -0,0 +1,19 @@
+#!/bin/sh -e
+# CoreSight / Thread Loop 2 Threads - Check TID
+
+# SPDX-License-Identifier: GPL-2.0
+# Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+
+TEST="thread_loop"
+. $(dirname $0)/../lib/coresight.sh
+ARGS="2 20"
+DATV="check-tid-2th"
+DATA="$DATD/perf-$TEST-$DATV.data"
+STDO="$DATD/perf-$TEST-$DATV.stdout"
+
+SHOW_TID=1 perf record -s $PERFRECOPT -o "$DATA" "$BIN" $ARGS > $STDO
+
+perf_dump_aux_tid_verify "$DATA" "$STDO"
+
+err=$?
+exit $err
diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread/.gitignore b/tools/perf/tests/shell/coresight/unroll_loop_thread/.gitignore
new file mode 100644
index 000000000000..2cb4e996dbf3
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/unroll_loop_thread/.gitignore
@@ -0,0 +1 @@
+unroll_loop_thread
diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread/Makefile b/tools/perf/tests/shell/coresight/unroll_loop_thread/Makefile
new file mode 100644
index 000000000000..6264c4e3abd1
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/unroll_loop_thread/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+# Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+include ../Makefile.miniconfig
+
+# Binary to produce
+BIN=unroll_loop_thread
+# Any linking/libraries needed for the binary - empty if none needed
+LIB=-pthread
+
+all: $(BIN)
+
+$(BIN): $(BIN).c
+ifdef CORESIGHT
+ifeq ($(ARCH),arm64)
+# Build line
+	$(Q)$(CC) $(BIN).c -o $(BIN) $(LIB)
+endif
+endif
+
+install-tests: all
+ifdef CORESIGHT
+ifeq ($(ARCH),arm64)
+# Install the test tool in the right place
+	$(call QUIET_INSTALL, tests) \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$(INSTDIR_SUB)/$(BIN)'; \
+		$(INSTALL) $(BIN) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$(INSTDIR_SUB)/$(BIN)/$(BIN)'
+endif
+endif
+
+clean:
+	$(Q)$(RM) -f $(BIN)
+
+.PHONY: all clean install-tests
diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c b/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c
new file mode 100644
index 000000000000..8f6d384208ed
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+// Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+
+struct args {
+	pthread_t th;
+	unsigned int in;
+	void *ret;
+};
+
+static void *thrfn(void *arg)
+{
+	struct args *a = arg;
+	unsigned int i, in = a->in;
+
+	for (i = 0; i < 10000; i++) {
+		asm volatile (
+// force an unroll of thia add instruction so we can test long runs of code
+#define SNIP1 "add %[in], %[in], #1\n"
+// 10
+#define SNIP2 SNIP1 SNIP1 SNIP1 SNIP1 SNIP1 SNIP1 SNIP1 SNIP1 SNIP1 SNIP1
+// 100
+#define SNIP3 SNIP2 SNIP2 SNIP2 SNIP2 SNIP2 SNIP2 SNIP2 SNIP2 SNIP2 SNIP2
+// 1000
+#define SNIP4 SNIP3 SNIP3 SNIP3 SNIP3 SNIP3 SNIP3 SNIP3 SNIP3 SNIP3 SNIP3
+// 10000
+#define SNIP5 SNIP4 SNIP4 SNIP4 SNIP4 SNIP4 SNIP4 SNIP4 SNIP4 SNIP4 SNIP4
+// 100000
+			SNIP5 SNIP5 SNIP5 SNIP5 SNIP5 SNIP5 SNIP5 SNIP5 SNIP5 SNIP5
+			: /* out */
+			: /* in */ [in] "r" (in)
+			: /* clobber */
+		);
+	}
+}
+
+static pthread_t new_thr(void *(*fn) (void *arg), void *arg)
+{
+	pthread_t t;
+	pthread_attr_t attr;
+
+	pthread_attr_init(&attr);
+	pthread_create(&t, &attr, fn, arg);
+	return t;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned int i, thr;
+	pthread_t threads[256];
+	struct args args[256];
+
+	if (argc < 2) {
+		printf("ERR: %s [numthreads]\n", argv[0]);
+		exit(1);
+	}
+
+	thr = atoi(argv[1]);
+	if ((thr > 256) || (thr < 1)) {
+		printf("ERR: threads 1-256\n");
+		exit(1);
+	}
+	for (i = 0; i < thr; i++) {
+		args[i].in = rand();
+		args[i].th = new_thr(thrfn, &(args[i]));
+	}
+	for (i = 0; i < thr; i++)
+		pthread_join(args[i].th, &(args[i].ret));
+	return 0;
+}
diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh b/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh
new file mode 100755
index 000000000000..f48c85230b15
--- /dev/null
+++ b/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh
@@ -0,0 +1,18 @@
+#!/bin/sh -e
+# CoreSight / Unroll Loop Thread 10
+
+# SPDX-License-Identifier: GPL-2.0
+# Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+
+TEST="unroll_loop_thread"
+. $(dirname $0)/../lib/coresight.sh
+ARGS="10"
+DATV="10"
+DATA="$DATD/perf-$TEST-$DATV.data"
+
+perf record $PERFRECOPT -o "$DATA" "$BIN" $ARGS
+
+perf_dump_aux_verify "$DATA" 10 10 10
+
+err=$?
+exit $err
diff --git a/tools/perf/tests/shell/lib/coresight.sh b/tools/perf/tests/shell/lib/coresight.sh
new file mode 100644
index 000000000000..45a1477256b6
--- /dev/null
+++ b/tools/perf/tests/shell/lib/coresight.sh
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: GPL-2.0
+# Carsten Haitzler <carsten.haitzler@arm.com>, 2021
+
+# This is sourced from a driver script so no need for #!/bin... etc. at the
+# top - the assumption below is that it runs as part of sourcing after the
+# test sets up some basic env vars to say what it is.
+
+# This currently works with ETMv4 / ETF not any other packet types at thi
+# point. This will need changes if that changes.
+
+# perf record options for the perf tests to use
+PERFRECMEM="-m ,16M"
+PERFRECOPT="$PERFRECMEM -e cs_etm//u"
+
+TOOLS=$(dirname $0)
+DIR="$TOOLS/$TEST"
+BIN="$DIR/$TEST"
+# If the test tool/binary does not exist and is executable then skip the test
+if ! test -x "$BIN"; then exit 2; fi
+DATD="."
+# If the data dir env is set then make the data dir use that instead of ./
+if test -n "$PERF_TEST_CORESIGHT_DATADIR"; then
+	DATD="$PERF_TEST_CORESIGHT_DATADIR";
+fi
+# If the stat dir env is set then make the data dir use that instead of ./
+STATD="."
+if test -n "$PERF_TEST_CORESIGHT_STATDIR"; then
+	STATD="$PERF_TEST_CORESIGHT_STATDIR";
+fi
+
+# Called if the test fails - error code 1
+err() {
+	echo "$1"
+	exit 1
+}
+
+# Check that some statistics from our perf
+check_val_min() {
+	STATF="$4"
+	if test "$2" -lt "$3"; then
+		echo ", FAILED" >> "$STATF"
+		err "Sanity check number of $1 is too low ($2 < $3)"
+	fi
+}
+
+perf_dump_aux_verify() {
+	# Some basic checking that the AUX chunk contains some sensible data
+	# to see that we are recording something and at least a minimum
+	# amount of it. We should almost always see Fn packets in just about
+	# anything but certainly we will see some trace info and async
+	# packets
+	DUMP="$DATD/perf-tmp-aux-dump.txt"
+	perf report --stdio --dump -i "$1" | \
+		grep -o -e I_ATOM_F -e I_ASYNC -e I_TRACE_INFO > "$DUMP"
+	# Simply count how many of these packets we find to see that we are
+	# producing a reasonable amount of data - exact checks are not sane
+	# as this is a lossy process where we may lose some blocks and the
+	# compiler may produce different code depending on the compiler and
+	# optimization options, so this is rough just to see if we're
+	# either missing almost all the data or all of it
+	ATOM_FX_NUM=`grep I_ATOM_F "$DUMP" | wc -l`
+	ASYNC_NUM=`grep I_ASYNC "$DUMP" | wc -l`
+	TRACE_INFO_NUM=`grep I_TRACE_INFO "$DUMP" | wc -l`
+	rm -f "$DUMP"
+
+	# Arguments provide minimums for a pass
+	CHECK_FX_MIN="$2"
+	CHECK_ASYNC_MIN="$3"
+	CHECK_TRACE_INFO_MIN="$4"
+
+	# Write out statistics, so over time you can track results to see if
+	# there is a pattern - for example we have less "noisy" results that
+	# produce more consistent amounts of data each run, to see if over
+	# time any techinques to  minimize data loss are having an effect or
+	# not
+	STATF="$STATD/stats-$TEST-$DATV.csv"
+	if ! test -f "$STATF"; then
+		echo "ATOM Fx Count, Minimum, ASYNC Count, Minimum, TRACE INFO Count, Minimum" > "$STATF"
+	fi
+	echo -n "$ATOM_FX_NUM, $CHECK_FX_MIN, $ASYNC_NUM, $CHECK_ASYNC_MIN, $TRACE_INFO_NUM, $CHECK_TRACE_INFO_MIN" >> "$STATF"
+
+	# Actually check to see if we passed or failed.
+	check_val_min "ATOM_FX" "$ATOM_FX_NUM" "$CHECK_FX_MIN" "$STATF"
+	check_val_min "ASYNC" "$ASYNC_NUM" "$CHECK_ASYNC_MIN" "$STATF"
+	check_val_min "TRACE_INFO" "$TRACE_INFO_NUM" "$CHECK_TRACE_INFO_MIN" "$STATF"
+	echo ", Ok" >> "$STATF"
+}
+
+perf_dump_aux_tid_verify() {
+	# Specifically crafted test will produce a list of Tread ID's to
+	# stdout that need to be checked to  see that they have had trace
+	# info collected in AUX blocks in the perf data. This will go
+	# through all the TID's that are listed as CID=0xabcdef and see
+	# that all the Thread IDs the test tool reports are  in the perf
+	# data AUX chunks
+
+	# The TID test tools will print a TID per stdout line that are being
+	# tested
+	TIDS=`cat "$2"`
+	# Scan the perf report to find the TIDs that are actually CID in hex
+	# and build a list of the ones found
+	FOUND_TIDS=`perf report --stdio --dump -i "$1" | \
+			grep -o "CID=0x[0-9a-z]\+" | sed 's/CID=//g' | \
+			uniq | sort | uniq`
+	# No CID=xxx found - maybe your kernel is reporting these as
+	# VMID=xxx so look there
+	if test -z "$FOUND_TIDS"; then
+		FOUND_TIDS=`perf report --stdio --dump -i "$1" | \
+				grep -o "VMID=0x[0-9a-z]\+" | sed 's/VMID=//g' | \
+				uniq | sort | uniq`
+	fi
+
+	# Iterate over the list of TIDs that the test says it has and find
+	# them in the TIDs found in the perf report
+	MISSING=""
+	for TID2 in $TIDS; do
+		FOUND=""
+		for TIDHEX in $FOUND_TIDS; do
+			TID=`printf "%i" $TIDHEX`
+			if test "$TID" -eq "$TID2"; then
+				FOUND="y"
+				break
+			fi
+		done
+		if test -z "$FOUND"; then
+			MISSING="$MISSING $TID"
+		fi
+	done
+	if test -n "$MISSING"; then
+		err "Thread IDs $MISSING not found in perf AUX data"
+	fi
+}
diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
index 5b17d916c555..b616d42bd19d 100644
--- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -19,6 +19,6 @@ add_probe_vfs_getname() {
 }
 
 skip_if_no_debuginfo() {
-	add_probe_vfs_getname -v 2>&1 | egrep -q "^(Failed to find the path for the kernel|Debuginfo-analysis is not supported)" && return 2
+	add_probe_vfs_getname -v 2>&1 | egrep -q "^(Failed to find the path for the kernel|Debuginfo-analysis is not supported)|(file has no debug information)" && return 2
 	return 1
 }
diff --git a/tools/perf/tests/shell/lib/waiting.sh b/tools/perf/tests/shell/lib/waiting.sh
new file mode 100644
index 000000000000..e7a39134a68e
--- /dev/null
+++ b/tools/perf/tests/shell/lib/waiting.sh
@@ -0,0 +1,77 @@
+# SPDX-License-Identifier: GPL-2.0
+
+tenths=date\ +%s%1N
+
+# Wait for PID $1 to have $2 number of threads started
+# Time out after $3 tenths of a second or 5 seconds if $3 is ""
+wait_for_threads()
+{
+	tm_out=$3 ; [ -n "${tm_out}" ] || tm_out=50
+	start_time=$($tenths)
+	while [ -e "/proc/$1/task" ] ; do
+		th_cnt=$(find "/proc/$1/task" -mindepth 1 -maxdepth 1 -printf x | wc -c)
+		if [ "${th_cnt}" -ge "$2" ] ; then
+			return 0
+		fi
+		# Wait at most tm_out tenths of a second
+		if [ $(($($tenths) - start_time)) -ge $tm_out ] ; then
+			echo "PID $1 does not have $2 threads"
+			return 1
+		fi
+	done
+	return 1
+}
+
+# Wait for perf record -vvv 2>$2 with PID $1 to start by looking at file $2
+# It depends on capturing perf record debug message "perf record has started"
+# Time out after $3 tenths of a second or 5 seconds if $3 is ""
+wait_for_perf_to_start()
+{
+	tm_out=$3 ; [ -n "${tm_out}" ] || tm_out=50
+	echo "Waiting for \"perf record has started\" message"
+	start_time=$($tenths)
+	while [ -e "/proc/$1" ] ; do
+		if grep -q "perf record has started" "$2" ; then
+			echo OK
+			break
+		fi
+		# Wait at most tm_out tenths of a second
+		if [ $(($($tenths) - start_time)) -ge $tm_out ] ; then
+			echo "perf recording did not start"
+			return 1
+		fi
+	done
+	return 0
+}
+
+# Wait for process PID %1 to exit
+# Time out after $2 tenths of a second or 5 seconds if $2 is ""
+wait_for_process_to_exit()
+{
+	tm_out=$2 ; [ -n "${tm_out}" ] || tm_out=50
+	start_time=$($tenths)
+	while [ -e "/proc/$1" ] ; do
+		# Wait at most tm_out tenths of a second
+		if [ $(($($tenths) - start_time)) -ge $tm_out ] ; then
+			echo "PID $1 did not exit as expected"
+			return 1
+		fi
+	done
+	return 0
+}
+
+# Check if PID $1 is still running after $2 tenths of a second
+# or 0.3 seconds if $2 is ""
+is_running()
+{
+	tm_out=$2 ; [ -n "${tm_out}" ] || tm_out=3
+	start_time=$($tenths)
+	while [ -e "/proc/$1" ] ; do
+		# Check for at least tm_out tenths of a second
+		if [ $(($($tenths) - start_time)) -gt $tm_out ] ; then
+			return 0
+		fi
+	done
+	echo "PID $1 exited prematurely"
+	return 1
+}
diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh
new file mode 100755
index 000000000000..04bf604e3c6f
--- /dev/null
+++ b/tools/perf/tests/shell/lock_contention.sh
@@ -0,0 +1,73 @@
+#!/bin/sh
+# kernel lock contention analysis test
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+err=0
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+result=$(mktemp /tmp/__perf_test.result.XXXXX)
+
+cleanup() {
+	rm -f ${perfdata}
+	rm -f ${result}
+	trap - exit term int
+}
+
+trap_cleanup() {
+	cleanup
+	exit ${err}
+}
+trap trap_cleanup exit term int
+
+check() {
+	if [ `id -u` != 0 ]; then
+		echo "[Skip] No root permission"
+		err=2
+		exit
+	fi
+
+	if ! perf list | grep -q lock:contention_begin; then
+		echo "[Skip] No lock contention tracepoints"
+		err=2
+		exit
+	fi
+}
+
+test_record()
+{
+	echo "Testing perf lock record and perf lock contention"
+	perf lock record -o ${perfdata} -- perf bench sched messaging > /dev/null 2>&1
+	# the output goes to the stderr and we expect only 1 output (-E 1)
+	perf lock contention -i ${perfdata} -E 1 -q 2> ${result}
+	if [ $(cat "${result}" | wc -l) != "1" ]; then
+		echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l)
+		err=1
+		exit
+	fi
+}
+
+test_bpf()
+{
+	echo "Testing perf lock contention --use-bpf"
+
+	if ! perf lock con -b true > /dev/null 2>&1 ; then
+		echo "[Skip] No BPF support"
+		exit
+	fi
+
+	# the perf lock contention output goes to the stderr
+	perf lock con -a -b -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result}
+	if [ $(cat "${result}" | wc -l) != "1" ]; then
+		echo "[Fail] BPF result count is not 1:" $(cat "${result}" | wc -l)
+		err=1
+		exit
+	fi
+}
+
+check
+
+test_record
+test_bpf
+
+exit ${err}
diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 00c7285ce1ac..301f95427159 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -61,7 +61,7 @@ test_register_capture() {
     echo "Register capture test [Skipped missing registers]"
     return
   fi
-  if ! perf record -o - --intr-regs=di,r8,dx,cx -e cpu/br_inst_retired.near_call/p \
+  if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \
     -c 1000 --per-thread true 2> /dev/null \
     | perf script -F ip,sym,iregs -i - 2> /dev/null \
     | egrep -q "DI:"
diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh
index eb5196f58190..b7f050aa6210 100755
--- a/tools/perf/tests/shell/stat+csv_output.sh
+++ b/tools/perf/tests/shell/stat+csv_output.sh
@@ -6,6 +6,8 @@
 
 set -e
 
+skip_test=0
+
 function commachecker()
 {
 	local -i cnt=0
@@ -156,14 +158,47 @@ check_per_socket()
 	echo "[Success]"
 }
 
+# The perf stat options for per-socket, per-core, per-die
+# and -A ( no_aggr mode ) uses the info fetched from this
+# directory: "/sys/devices/system/cpu/cpu*/topology". For
+# example, socket value is fetched from "physical_package_id"
+# file in topology directory.
+# Reference: cpu__get_topology_int in util/cpumap.c
+# If the platform doesn't expose topology information, values
+# will be set to -1. For example, incase of pSeries platform
+# of powerpc, value for  "physical_package_id" is restricted
+# and set to -1. Check here validates the socket-id read from
+# topology file before proceeding further
+
+FILE_LOC="/sys/devices/system/cpu/cpu*/topology/"
+FILE_NAME="physical_package_id"
+
+check_for_topology()
+{
+	if ! ParanoidAndNotRoot 0
+	then
+		socket_file=`ls $FILE_LOC/$FILE_NAME | head -n 1`
+		[ -z $socket_file ] && return 0
+		socket_id=`cat $socket_file`
+		[ $socket_id == -1 ] && skip_test=1
+		return 0
+	fi
+}
+
+check_for_topology
 check_no_args
 check_system_wide
-check_system_wide_no_aggr
 check_interval
 check_event
-check_per_core
 check_per_thread
-check_per_die
 check_per_node
-check_per_socket
+if [ $skip_test -ne 1 ]
+then
+	check_system_wide_no_aggr
+	check_per_core
+	check_per_die
+	check_per_socket
+else
+	echo "[Skip] Skipping tests for system_wide_no_aggr, per_core, per_die and per_socket since socket id exposed via topology is invalid"
+fi
 exit 0
diff --git a/tools/perf/tests/shell/stat+json_output.sh b/tools/perf/tests/shell/stat+json_output.sh
index ea8714a36051..2c4212c641ed 100755
--- a/tools/perf/tests/shell/stat+json_output.sh
+++ b/tools/perf/tests/shell/stat+json_output.sh
@@ -6,6 +6,8 @@
 
 set -e
 
+skip_test=0
+
 pythonchecker=$(dirname $0)/lib/perf_json_output_lint.py
 if [ "x$PYTHON" == "x" ]
 then
@@ -134,14 +136,47 @@ check_per_socket()
 	echo "[Success]"
 }
 
+# The perf stat options for per-socket, per-core, per-die
+# and -A ( no_aggr mode ) uses the info fetched from this
+# directory: "/sys/devices/system/cpu/cpu*/topology". For
+# example, socket value is fetched from "physical_package_id"
+# file in topology directory.
+# Reference: cpu__get_topology_int in util/cpumap.c
+# If the platform doesn't expose topology information, values
+# will be set to -1. For example, incase of pSeries platform
+# of powerpc, value for  "physical_package_id" is restricted
+# and set to -1. Check here validates the socket-id read from
+# topology file before proceeding further
+
+FILE_LOC="/sys/devices/system/cpu/cpu*/topology/"
+FILE_NAME="physical_package_id"
+
+check_for_topology()
+{
+	if ! ParanoidAndNotRoot 0
+	then
+		socket_file=`ls $FILE_LOC/$FILE_NAME | head -n 1`
+		[ -z $socket_file ] && return 0
+		socket_id=`cat $socket_file`
+		[ $socket_id == -1 ] && skip_test=1
+		return 0
+	fi
+}
+
+check_for_topology
 check_no_args
 check_system_wide
-check_system_wide_no_aggr
 check_interval
 check_event
-check_per_core
 check_per_thread
-check_per_die
 check_per_node
-check_per_socket
+if [ $skip_test -ne 1 ]
+then
+	check_system_wide_no_aggr
+	check_per_core
+	check_per_die
+	check_per_socket
+else
+	echo "[Skip] Skipping tests for system_wide_no_aggr, per_core, per_die and per_socket since socket id exposed via topology is invalid"
+fi
 exit 0
diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh
index 9313ef2739e0..26a51b48aee4 100755
--- a/tools/perf/tests/shell/stat.sh
+++ b/tools/perf/tests/shell/stat.sh
@@ -28,6 +28,24 @@ test_stat_record_report() {
   echo "stat record and report test [Success]"
 }
 
+test_stat_repeat_weak_groups() {
+  echo "stat repeat weak groups test"
+  if ! perf stat -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}' \
+     true 2>&1 | grep -q 'seconds time elapsed'
+  then
+    echo "stat repeat weak groups test [Skipped event parsing failed]"
+    return
+  fi
+  if ! perf stat -r2 -e '{cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles,cycles}:W' \
+    true > /dev/null 2>&1
+  then
+    echo "stat repeat weak groups test [Failed]"
+    err=1
+    return
+  fi
+  echo "stat repeat weak groups test [Success]"
+}
+
 test_topdown_groups() {
   # Topdown events must be grouped with the slots event first. Test that
   # parse-events reorders this.
@@ -75,6 +93,7 @@ test_topdown_weak_groups() {
 
 test_default_stat
 test_stat_record_report
+test_stat_repeat_weak_groups
 test_topdown_groups
 test_topdown_weak_groups
 exit $err
diff --git a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh
new file mode 100755
index 000000000000..d724855d097c
--- /dev/null
+++ b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh
@@ -0,0 +1,83 @@
+#!/bin/sh
+# perf stat --bpf-counters --for-each-cgroup test
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+test_cgroups=
+if [ "$1" = "-v" ]; then
+	verbose="1"
+fi
+
+# skip if --bpf-counters --for-each-cgroup is not supported
+check_bpf_counter()
+{
+	if ! perf stat -a --bpf-counters --for-each-cgroup / true > /dev/null 2>&1; then
+		if [ "${verbose}" = "1" ]; then
+			echo "Skipping: --bpf-counters --for-each-cgroup not supported"
+			perf --no-pager stat -a --bpf-counters --for-each-cgroup / true || true
+		fi
+		exit 2
+	fi
+}
+
+# find two cgroups to measure
+find_cgroups()
+{
+	# try usual systemd slices first
+	if [ -d /sys/fs/cgroup/system.slice -a -d /sys/fs/cgroup/user.slice ]; then
+		test_cgroups="system.slice,user.slice"
+		return
+	fi
+
+	# try root and self cgroups
+	local self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3)
+	if [ -z ${self_cgrp} ]; then
+		# cgroup v2 doesn't specify perf_event
+		self_cgrp=$(grep ^0: /proc/self/cgroup | cut -d: -f3)
+	fi
+
+	if [ -z ${self_cgrp} ]; then
+		test_cgroups="/"
+	else
+		test_cgroups="/,${self_cgrp}"
+	fi
+}
+
+# As cgroup events are cpu-wide, we cannot simply compare the result.
+# Just check if it runs without failure and has non-zero results.
+check_system_wide_counted()
+{
+	local output
+
+	output=$(perf stat -a --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, sleep 1  2>&1)
+	if echo ${output} | grep -q -F "<not "; then
+		echo "Some system-wide events are not counted"
+		if [ "${verbose}" = "1" ]; then
+			echo ${output}
+		fi
+		exit 1
+	fi
+}
+
+check_cpu_list_counted()
+{
+	local output
+
+	output=$(perf stat -C 1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1  2>&1)
+	if echo ${output} | grep -q -F "<not "; then
+		echo "Some CPU events are not counted"
+		if [ "${verbose}" = "1" ]; then
+			echo ${output}
+		fi
+		exit 1
+	fi
+}
+
+check_bpf_counter
+find_cgroups
+
+check_system_wide_counted
+check_cpu_list_counted
+
+exit 0
diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh
index e4cb4f1806ff..daad786cf48d 100755
--- a/tools/perf/tests/shell/test_arm_coresight.sh
+++ b/tools/perf/tests/shell/test_arm_coresight.sh
@@ -70,7 +70,7 @@ perf_report_instruction_samples() {
 	#   68.12%  touch    libc-2.27.so   [.] _dl_addr
 	#    5.80%  touch    libc-2.27.so   [.] getenv
 	#    4.35%  touch    ld-2.27.so     [.] _dl_fixup
-	perf report --itrace=i1000i --stdio -i ${perfdata} 2>&1 | \
+	perf report --itrace=i20i --stdio -i ${perfdata} 2>&1 | \
 		egrep " +[0-9]+\.[0-9]+% +$1" > /dev/null 2>&1
 }
 
diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh
index c644f94a6500..d7ff5c4b4da4 100755
--- a/tools/perf/tests/shell/test_brstack.sh
+++ b/tools/perf/tests/shell/test_brstack.sh
@@ -12,7 +12,11 @@ if ! [ -x "$(command -v cc)" ]; then
 fi
 
 # skip the test if the hardware doesn't support branch stack sampling
-perf record -b -o- -B true > /dev/null 2>&1 || exit 2
+# and if the architecture doesn't support filter types: any,save_type,u
+if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev/null 2>&1 ; then
+	echo "skip: system doesn't support filter types: any,save_type,u"
+	exit 2
+fi
 
 TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX)
 
diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh
new file mode 100755
index 000000000000..cd6eb54d235d
--- /dev/null
+++ b/tools/perf/tests/shell/test_data_symbol.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# Test data symbol
+
+# SPDX-License-Identifier: GPL-2.0
+# Leo Yan <leo.yan@linaro.org>, 2022
+
+skip_if_no_mem_event() {
+	perf mem record -e list 2>&1 | egrep -q 'available' && return 0
+	return 2
+}
+
+skip_if_no_mem_event || exit 2
+
+# skip if there's no compiler
+if ! [ -x "$(command -v cc)" ]; then
+	echo "skip: no compiler, install gcc"
+	exit 2
+fi
+
+TEST_PROGRAM=$(mktemp /tmp/__perf_test.program.XXXXX)
+PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+
+check_result() {
+	# The memory report format is as below:
+	#    99.92%  ...  [.] buf1+0x38
+	result=$(perf mem report -i ${PERF_DATA} -s symbol_daddr -q 2>&1 |
+		 awk '/buf1/ { print $4 }')
+
+	# Testing is failed if has no any sample for "buf1"
+	[ -z "$result" ] && return 1
+
+	while IFS= read -r line; do
+		# The "data1" and "data2" fields in structure "buf1" have
+		# offset "0x0" and "0x38", returns failure if detect any
+		# other offset value.
+		if [ "$line" != "buf1+0x0" ] && [ "$line" != "buf1+0x38" ]; then
+			return 1
+		fi
+	done <<< "$result"
+
+	return 0
+}
+
+cleanup_files()
+{
+	echo "Cleaning up files..."
+	rm -f ${PERF_DATA}
+	rm -f ${TEST_PROGRAM}
+}
+
+trap cleanup_files exit term int
+
+# compile test program
+echo "Compiling test program..."
+cat << EOF | cc -o ${TEST_PROGRAM} -x c -
+typedef struct _buf {
+	char data1;
+	char reserved[55];
+	char data2;
+} buf __attribute__((aligned(64)));
+
+static buf buf1;
+
+int main(void) {
+	for (;;) {
+		buf1.data1++;
+		buf1.data2 += buf1.data1;
+	}
+	return 0;
+}
+EOF
+
+echo "Recording workload..."
+
+# perf mem/c2c internally uses IBS PMU on AMD CPU which doesn't support
+# user/kernel filtering and per-process monitoring, spin program on
+# specific CPU and test in per-CPU mode.
+is_amd=$(egrep -c 'vendor_id.*AuthenticAMD' /proc/cpuinfo)
+if (($is_amd >= 1)); then
+	perf mem record -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM &
+else
+	perf mem record --all-user -o ${PERF_DATA} -- $TEST_PROGRAM &
+fi
+
+PERFPID=$!
+
+sleep 1
+
+kill $PERFPID
+wait $PERFPID
+
+check_result
+exit $?
diff --git a/tools/perf/tests/shell/test_intel_pt.sh b/tools/perf/tests/shell/test_intel_pt.sh
index a3298643884d..f5ed7b1af419 100755
--- a/tools/perf/tests/shell/test_intel_pt.sh
+++ b/tools/perf/tests/shell/test_intel_pt.sh
@@ -7,32 +7,116 @@ set -e
 # Skip if no Intel PT
 perf list | grep -q 'intel_pt//' || exit 2
 
+shelldir=$(dirname "$0")
+. "${shelldir}"/lib/waiting.sh
+
 skip_cnt=0
 ok_cnt=0
 err_cnt=0
 
-tmpfile=`mktemp`
-perfdatafile=`mktemp`
+temp_dir=$(mktemp -d /tmp/perf-test-intel-pt-sh.XXXXXXXXXX)
+
+tmpfile="${temp_dir}/tmp-perf.data"
+perfdatafile="${temp_dir}/test-perf.data"
+outfile="${temp_dir}/test-out.txt"
+errfile="${temp_dir}/test-err.txt"
+workload="${temp_dir}/workload"
+awkscript="${temp_dir}/awkscript"
+jitdump_workload="${temp_dir}/jitdump_workload"
+maxbrstack="${temp_dir}/maxbrstack.py"
+
+cleanup()
+{
+	trap - EXIT TERM INT
+	sane=$(echo "${temp_dir}" | cut -b 1-26)
+	if [ "${sane}" = "/tmp/perf-test-intel-pt-sh" ] ; then
+		echo "--- Cleaning up ---"
+		rm -f "${temp_dir}/"*
+		rmdir "${temp_dir}"
+	fi
+}
+
+trap_cleanup()
+{
+	cleanup
+	exit 1
+}
+
+trap trap_cleanup EXIT TERM INT
+
+# perf record for testing without decoding
+perf_record_no_decode()
+{
+	# Options to speed up recording: no post-processing, no build-id cache update,
+	# and no BPF events.
+	perf record -B -N --no-bpf-event "$@"
+}
+
+# perf record for testing should not need BPF events
+perf_record_no_bpf()
+{
+	# Options for no BPF events
+	perf record --no-bpf-event "$@"
+}
+
+have_workload=false
+cat << _end_of_file_ | /usr/bin/cc -o "${workload}" -xc - -pthread && have_workload=true
+#include <time.h>
+#include <pthread.h>
+
+void work(void) {
+	struct timespec tm = {
+		.tv_nsec = 1000000,
+	};
+	int i;
+
+	/* Run for about 30 seconds */
+	for (i = 0; i < 30000; i++)
+		nanosleep(&tm, NULL);
+}
+
+void *threadfunc(void *arg) {
+	work();
+	return NULL;
+}
+
+int main(void) {
+	pthread_t th;
+
+	pthread_create(&th, NULL, threadfunc, NULL);
+	work();
+	pthread_join(th, NULL);
+	return 0;
+}
+_end_of_file_
 
 can_cpu_wide()
 {
-	perf record -o ${tmpfile} -B -N --no-bpf-event -e dummy:u -C $1 true 2>&1 >/dev/null || return 2
+	echo "Checking for CPU-wide recording on CPU $1"
+	if ! perf_record_no_decode -o "${tmpfile}" -e dummy:u -C "$1" true >/dev/null 2>&1 ; then
+		echo "No so skipping"
+		return 2
+	fi
+	echo OK
 	return 0
 }
 
 test_system_wide_side_band()
 {
+	echo "--- Test system-wide sideband ---"
+
 	# Need CPU 0 and CPU 1
 	can_cpu_wide 0 || return $?
 	can_cpu_wide 1 || return $?
 
 	# Record on CPU 0 a task running on CPU 1
-	perf record -B -N --no-bpf-event -o ${perfdatafile} -e intel_pt//u -C 0 -- taskset --cpu-list 1 uname
+	perf_record_no_decode -o "${perfdatafile}" -e intel_pt//u -C 0 -- taskset --cpu-list 1 uname
 
 	# Should get MMAP events from CPU 1 because they can be needed to decode
-	mmap_cnt=`perf script -i ${perfdatafile} --no-itrace --show-mmap-events -C 1 2>/dev/null | grep MMAP | wc -l`
+	mmap_cnt=$(perf script -i "${perfdatafile}" --no-itrace --show-mmap-events -C 1 2>/dev/null | grep -c MMAP)
 
-	if [ ${mmap_cnt} -gt 0 ] ; then
+	if [ "${mmap_cnt}" -gt 0 ] ; then
+		echo OK
 		return 0
 	fi
 
@@ -40,25 +124,533 @@ test_system_wide_side_band()
 	return 1
 }
 
+can_kernel()
+{
+	if [ -z "${can_kernel_trace}" ] ; then
+		can_kernel_trace=0
+		perf_record_no_decode -o "${tmpfile}" -e dummy:k true >/dev/null 2>&1 && can_kernel_trace=1
+	fi
+	if [ ${can_kernel_trace} -eq 0 ] ; then
+		echo "SKIP: no kernel tracing"
+		return 2
+	fi
+	return 0
+}
+
+test_per_thread()
+{
+	k="$1"
+	desc="$2"
+
+	echo "--- Test per-thread ${desc}recording ---"
+
+	if ! $have_workload ; then
+		echo "No workload, so skipping"
+		return 2
+	fi
+
+	if [ "${k}" = "k" ] ; then
+		can_kernel || return 2
+	fi
+
+	cat <<- "_end_of_file_" > "${awkscript}"
+	BEGIN {
+		s = "[ ]*"
+		u = s"[0-9]+"s
+		d = s"[0-9-]+"s
+		x = s"[0-9a-fA-FxX]+"s
+		mmapping = "idx"u": mmapping fd"u
+		set_output = "idx"u": set output fd"u"->"u
+		perf_event_open = "sys_perf_event_open: pid"d"cpu"d"group_fd"d"flags"x"="u
+	}
+
+	/perf record opening and mmapping events/ {
+		if (!done)
+			active = 1
+	}
+
+	/perf record done opening and mmapping events/ {
+		active = 0
+		done = 1
+	}
+
+	$0 ~ perf_event_open && active {
+		match($0, perf_event_open)
+		$0 = substr($0, RSTART, RLENGTH)
+		pid = $3
+		cpu = $5
+		fd = $11
+		print "pid " pid " cpu " cpu " fd " fd " : " $0
+		fd_array[fd] = fd
+		pid_array[fd] = pid
+		cpu_array[fd] = cpu
+	}
+
+	$0 ~ mmapping && active  {
+		match($0, mmapping)
+		$0 = substr($0, RSTART, RLENGTH)
+		fd = $5
+		print "fd " fd " : " $0
+		if (fd in fd_array) {
+			mmap_array[fd] = 1
+		} else {
+			print "Unknown fd " fd
+			exit 1
+		}
+	}
+
+	$0 ~ set_output && active {
+		match($0, set_output)
+		$0 = substr($0, RSTART, RLENGTH)
+		fd = $6
+		fd_to = $8
+		print "fd " fd " fd_to " fd_to " : " $0
+		if (fd in fd_array) {
+			if (fd_to in fd_array) {
+				set_output_array[fd] = fd_to
+			} else {
+				print "Unknown fd " fd_to
+				exit 1
+			}
+		} else {
+			print "Unknown fd " fd
+			exit 1
+		}
+	}
+
+	END {
+		print "Checking " length(fd_array) " fds"
+		for (fd in fd_array) {
+			if (fd in mmap_array) {
+				pid = pid_array[fd]
+				if (pid != -1) {
+					if (pid in pids) {
+						print "More than 1 mmap for PID " pid
+						exit 1
+					}
+					pids[pid] = 1
+				}
+				cpu = cpu_array[fd]
+				if (cpu != -1) {
+					if (cpu in cpus) {
+						print "More than 1 mmap for CPU " cpu
+						exit 1
+					}
+					cpus[cpu] = 1
+				}
+			} else if (!(fd in set_output_array)) {
+				print "No mmap for fd " fd
+				exit 1
+			}
+		}
+		n = length(pids)
+		if (n != thread_cnt) {
+			print "Expected " thread_cnt " per-thread mmaps - found " n
+			exit 1
+		}
+	}
+	_end_of_file_
+
+	$workload &
+	w1=$!
+	$workload &
+	w2=$!
+	echo "Workload PIDs are $w1 and $w2"
+	wait_for_threads ${w1} 2
+	wait_for_threads ${w2} 2
+
+	perf_record_no_decode -o "${perfdatafile}" -e intel_pt//u"${k}" -vvv --per-thread -p "${w1},${w2}" 2>"${errfile}" >"${outfile}" &
+	ppid=$!
+	echo "perf PID is $ppid"
+	wait_for_perf_to_start ${ppid} "${errfile}" || return 1
+
+	kill ${w1}
+	wait_for_process_to_exit ${w1} || return 1
+	is_running ${ppid} || return 1
+
+	kill ${w2}
+	wait_for_process_to_exit ${w2} || return 1
+	wait_for_process_to_exit ${ppid} || return 1
+
+	awk -v thread_cnt=4 -f "${awkscript}" "${errfile}" || return 1
+
+	echo OK
+	return 0
+}
+
+test_jitdump()
+{
+	echo "--- Test tracing self-modifying code that uses jitdump ---"
+
+	script_path=$(realpath "$0")
+	script_dir=$(dirname "$script_path")
+	jitdump_incl_dir="${script_dir}/../../util"
+	jitdump_h="${jitdump_incl_dir}/jitdump.h"
+
+	if [ ! -e "${jitdump_h}" ] ; then
+		echo "SKIP: Include file jitdump.h not found"
+		return 2
+	fi
+
+	if [ -z "${have_jitdump_workload}" ] ; then
+		have_jitdump_workload=false
+		# Create a workload that uses self-modifying code and generates its own jitdump file
+		cat <<- "_end_of_file_" | /usr/bin/cc -o "${jitdump_workload}" -I "${jitdump_incl_dir}" -xc - -pthread && have_jitdump_workload=true
+		#define _GNU_SOURCE
+		#include <sys/mman.h>
+		#include <sys/types.h>
+		#include <stddef.h>
+		#include <stdio.h>
+		#include <stdint.h>
+		#include <unistd.h>
+		#include <string.h>
+
+		#include "jitdump.h"
+
+		#define CHK_BYTE 0x5a
+
+		static inline uint64_t rdtsc(void)
+		{
+			unsigned int low, high;
+
+			asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+			return low | ((uint64_t)high) << 32;
+		}
+
+		static FILE *open_jitdump(void)
+		{
+			struct jitheader header = {
+				.magic      = JITHEADER_MAGIC,
+				.version    = JITHEADER_VERSION,
+				.total_size = sizeof(header),
+				.pid        = getpid(),
+				.timestamp  = rdtsc(),
+				.flags      = JITDUMP_FLAGS_ARCH_TIMESTAMP,
+			};
+			char filename[256];
+			FILE *f;
+			void *m;
+
+			snprintf(filename, sizeof(filename), "jit-%d.dump", getpid());
+			f = fopen(filename, "w+");
+			if (!f)
+				goto err;
+			/* Create an MMAP event for the jitdump file. That is how perf tool finds it. */
+			m = mmap(0, 4096, PROT_READ | PROT_EXEC, MAP_PRIVATE, fileno(f), 0);
+			if (m == MAP_FAILED)
+				goto err_close;
+			munmap(m, 4096);
+			if (fwrite(&header,sizeof(header),1,f) != 1)
+				goto err_close;
+			return f;
+
+		err_close:
+			fclose(f);
+		err:
+			return NULL;
+		}
+
+		static int write_jitdump(FILE *f, void *addr, const uint8_t *dat, size_t sz, uint64_t *idx)
+		{
+			struct jr_code_load rec = {
+				.p.id          = JIT_CODE_LOAD,
+				.p.total_size  = sizeof(rec) + sz,
+				.p.timestamp   = rdtsc(),
+				.pid	       = getpid(),
+				.tid	       = gettid(),
+				.vma           = (unsigned long)addr,
+				.code_addr     = (unsigned long)addr,
+				.code_size     = sz,
+				.code_index    = ++*idx,
+			};
+
+			if (fwrite(&rec,sizeof(rec),1,f) != 1 ||
+			fwrite(dat, sz, 1, f) != 1)
+				return -1;
+			return 0;
+		}
+
+		static void close_jitdump(FILE *f)
+		{
+			fclose(f);
+		}
+
+		int main()
+		{
+			/* Get a memory page to store executable code */
+			void *addr = mmap(0, 4096, PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+			/* Code to execute: mov CHK_BYTE, %eax ; ret */
+			uint8_t dat[] = {0xb8, CHK_BYTE, 0x00, 0x00, 0x00, 0xc3};
+			FILE *f = open_jitdump();
+			uint64_t idx = 0;
+			int ret = 1;
+
+			if (!f)
+				return 1;
+			/* Copy executable code to executable memory page */
+			memcpy(addr, dat, sizeof(dat));
+			/* Record it in the jitdump file */
+			if (write_jitdump(f, addr, dat, sizeof(dat), &idx))
+				goto out_close;
+			/* Call it */
+			ret = ((int (*)(void))addr)() - CHK_BYTE;
+		out_close:
+			close_jitdump(f);
+			return ret;
+		}
+		_end_of_file_
+	fi
+
+	if ! $have_jitdump_workload ; then
+		echo "SKIP: No jitdump workload"
+		return 2
+	fi
+
+	# Change to temp_dir so jitdump collateral files go there
+	cd "${temp_dir}"
+	perf_record_no_bpf -o "${tmpfile}" -e intel_pt//u "${jitdump_workload}"
+	perf inject -i "${tmpfile}" -o "${perfdatafile}" --jit
+	decode_br_cnt=$(perf script -i "${perfdatafile}" --itrace=b | wc -l)
+	# Note that overflow and lost errors are suppressed for the error count
+	decode_err_cnt=$(perf script -i "${perfdatafile}" --itrace=e-o-l | grep -ci error)
+	cd -
+	# Should be thousands of branches
+	if [ "${decode_br_cnt}" -lt 1000 ] ; then
+		echo "Decode failed, only ${decode_br_cnt} branches"
+		return 1
+	fi
+	# Should be no errors
+	if [ "${decode_err_cnt}" -ne 0 ] ; then
+		echo "Decode failed, ${decode_err_cnt} errors"
+		perf script -i "${perfdatafile}" --itrace=e-o-l --show-mmap-events | cat
+		return 1
+	fi
+
+	echo OK
+	return 0
+}
+
+test_packet_filter()
+{
+	echo "--- Test with MTC and TSC disabled ---"
+	# Disable MTC and TSC
+	perf_record_no_decode -o "${perfdatafile}" -e intel_pt/mtc=0,tsc=0/u uname
+	# Should not get MTC packet
+	mtc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "MTC 0x")
+	if [ "${mtc_cnt}" -ne 0 ] ; then
+		echo "Failed to filter with mtc=0"
+		return 1
+	fi
+	# Should not get TSC package
+	tsc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "TSC 0x")
+	if [ "${tsc_cnt}" -ne 0 ] ; then
+		echo "Failed to filter with tsc=0"
+		return 1
+	fi
+	echo OK
+	return 0
+}
+
+test_disable_branch()
+{
+	echo "--- Test with branches disabled ---"
+	# Disable branch
+	perf_record_no_decode -o "${perfdatafile}" -e intel_pt/branch=0/u uname
+	# Should not get branch related packets
+	tnt_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "TNT 0x")
+	tip_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "TIP 0x")
+	fup_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "FUP 0x")
+	if [ "${tnt_cnt}" -ne 0 ] || [ "${tip_cnt}" -ne 0 ] || [ "${fup_cnt}" -ne 0 ] ; then
+		echo "Failed to disable branches"
+		return 1
+	fi
+	echo OK
+	return 0
+}
+
+test_time_cyc()
+{
+	echo "--- Test with/without CYC ---"
+	# Check if CYC is supported
+	cyc=$(cat /sys/bus/event_source/devices/intel_pt/caps/psb_cyc)
+	if [ "${cyc}" != "1" ] ; then
+		echo "SKIP: CYC is not supported"
+		return 2
+	fi
+	# Enable CYC
+	perf_record_no_decode -o "${perfdatafile}" -e intel_pt/cyc/u uname
+	# should get CYC packets
+	cyc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "CYC 0x")
+	if [ "${cyc_cnt}" = "0" ] ; then
+		echo "Failed to get CYC packet"
+		return 1
+	fi
+	# Without CYC
+	perf_record_no_decode -o "${perfdatafile}" -e intel_pt//u uname
+	# Should not get CYC packets
+	cyc_cnt=$(perf script -i "${perfdatafile}" -D 2>/dev/null | grep -c "CYC 0x")
+	if [ "${cyc_cnt}" -gt 0 ] ; then
+		echo "Still get CYC packet without cyc"
+		return 1
+	fi
+	echo OK
+	return 0
+}
+
+test_sample()
+{
+	echo "--- Test recording with sample mode ---"
+	# Check if recording with sample mode is working
+	if ! perf_record_no_decode -o "${perfdatafile}" --aux-sample=8192 -e '{intel_pt//u,branch-misses:u}' uname ; then
+		echo "perf record failed with --aux-sample"
+		return 1
+	fi
+	echo OK
+	return 0
+}
+
+test_kernel_trace()
+{
+	echo "--- Test with kernel trace ---"
+	# Check if recording with kernel trace is working
+	can_kernel || return 2
+	if ! perf_record_no_decode -o "${perfdatafile}" -e intel_pt//k -m1,128 uname ; then
+		echo "perf record failed with intel_pt//k"
+		return 1
+	fi
+	echo OK
+	return 0
+}
+
+test_virtual_lbr()
+{
+	echo "--- Test virtual LBR ---"
+	# Check if python script is supported
+	libpython=$(perf version --build-options | grep python | grep -cv OFF)
+	if [ "${libpython}" != "1" ] ; then
+		echo "SKIP: python scripting is not supported"
+		return 2
+	fi
+
+	# Python script to determine the maximum size of branch stacks
+	cat << "_end_of_file_" > "${maxbrstack}"
+from __future__ import print_function
+
+bmax = 0
+
+def process_event(param_dict):
+	if "brstack" in param_dict:
+		brstack = param_dict["brstack"]
+		n = len(brstack)
+		global bmax
+		if n > bmax:
+			bmax = n
+
+def trace_end():
+	print("max brstack", bmax)
+_end_of_file_
+
+	# Check if virtual lbr is working
+	perf_record_no_bpf -o "${perfdatafile}" --aux-sample -e '{intel_pt//,cycles}:u' uname
+	times_val=$(perf script -i "${perfdatafile}" --itrace=L -s "${maxbrstack}" 2>/dev/null | grep "max brstack " | cut -d " " -f 3)
+	case "${times_val}" in
+		[0-9]*)	;;
+		*)	times_val=0;;
+	esac
+	if [ "${times_val}" -lt 2 ] ; then
+		echo "Failed with virtual lbr"
+		return 1
+	fi
+	echo OK
+	return 0
+}
+
+test_power_event()
+{
+	echo "--- Test power events ---"
+	# Check if power events are supported
+	power_event=$(cat /sys/bus/event_source/devices/intel_pt/caps/power_event_trace)
+	if [ "${power_event}" != "1" ] ; then
+		echo "SKIP: power_event_trace is not supported"
+		return 2
+	fi
+	if ! perf_record_no_decode -o "${perfdatafile}" -a -e intel_pt/pwr_evt/u uname ; then
+		echo "perf record failed with pwr_evt"
+		return 1
+	fi
+	echo OK
+	return 0
+}
+
+test_no_tnt()
+{
+	echo "--- Test with TNT packets disabled  ---"
+	# Check if TNT disable is supported
+	notnt=$(cat /sys/bus/event_source/devices/intel_pt/caps/tnt_disable)
+	if [ "${notnt}" != "1" ] ; then
+		echo "SKIP: tnt_disable is not supported"
+		return 2
+	fi
+	perf_record_no_decode -o "${perfdatafile}" -e intel_pt/notnt/u uname
+	# Should be no TNT packets
+	tnt_cnt=$(perf script -i "${perfdatafile}" -D | grep -c TNT)
+	if [ "${tnt_cnt}" -ne 0 ] ; then
+		echo "TNT packets still there after notnt"
+		return 1
+	fi
+	echo OK
+	return 0
+}
+
+test_event_trace()
+{
+	echo "--- Test with event_trace ---"
+	# Check if event_trace is supported
+	event_trace=$(cat /sys/bus/event_source/devices/intel_pt/caps/event_trace)
+	if [ "${event_trace}" != 1 ] ; then
+		echo "SKIP: event_trace is not supported"
+		return 2
+	fi
+	if ! perf_record_no_decode -o "${perfdatafile}" -e intel_pt/event/u uname ; then
+		echo "perf record failed with event trace"
+		return 1
+	fi
+	echo OK
+	return 0
+}
+
 count_result()
 {
-	if [ $1 -eq 2 ] ; then
-		skip_cnt=`expr ${skip_cnt} \+ 1`
+	if [ "$1" -eq 2 ] ; then
+		skip_cnt=$((skip_cnt + 1))
 		return
 	fi
-	if [ $1 -eq 0 ] ; then
-		ok_cnt=`expr ${ok_cnt} \+ 1`
+	if [ "$1" -eq 0 ] ; then
+		ok_cnt=$((ok_cnt + 1))
 		return
 	fi
-	err_cnt=`expr ${err_cnt} \+ 1`
+	err_cnt=$((err_cnt + 1))
 }
 
-test_system_wide_side_band
+ret=0
+test_system_wide_side_band		|| ret=$? ; count_result $ret ; ret=0
+test_per_thread "" ""			|| ret=$? ; count_result $ret ; ret=0
+test_per_thread "k" "(incl. kernel) "	|| ret=$? ; count_result $ret ; ret=0
+test_jitdump				|| ret=$? ; count_result $ret ; ret=0
+test_packet_filter			|| ret=$? ; count_result $ret ; ret=0
+test_disable_branch			|| ret=$? ; count_result $ret ; ret=0
+test_time_cyc				|| ret=$? ; count_result $ret ; ret=0
+test_sample				|| ret=$? ; count_result $ret ; ret=0
+test_kernel_trace			|| ret=$? ; count_result $ret ; ret=0
+test_virtual_lbr			|| ret=$? ; count_result $ret ; ret=0
+test_power_event			|| ret=$? ; count_result $ret ; ret=0
+test_no_tnt				|| ret=$? ; count_result $ret ; ret=0
+test_event_trace			|| ret=$? ; count_result $ret ; ret=0
 
-count_result $?
+cleanup
 
-rm -f ${tmpfile}
-rm -f ${perfdatafile}
+echo "--- Done ---"
 
 if [ ${err_cnt} -gt 0 ] ; then
 	exit 1
diff --git a/tools/perf/tests/shell/test_java_symbol.sh b/tools/perf/tests/shell/test_java_symbol.sh
new file mode 100755
index 000000000000..f221225808a3
--- /dev/null
+++ b/tools/perf/tests/shell/test_java_symbol.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+# Test java symbol
+
+# SPDX-License-Identifier: GPL-2.0
+# Leo Yan <leo.yan@linaro.org>, 2022
+
+# skip if there's no jshell
+if ! [ -x "$(command -v jshell)" ]; then
+	echo "skip: no jshell, install JDK"
+	exit 2
+fi
+
+PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+PERF_INJ_DATA=$(mktemp /tmp/__perf_test.perf.data.inj.XXXXX)
+
+cleanup_files()
+{
+	echo "Cleaning up files..."
+	rm -f ${PERF_DATA}
+	rm -f ${PERF_INJ_DATA}
+}
+
+trap cleanup_files exit term int
+
+if [ -e "$PWD/tools/perf/libperf-jvmti.so" ]; then
+	LIBJVMTI=$PWD/tools/perf/libperf-jvmti.so
+elif [ -e "$PWD/libperf-jvmti.so" ]; then
+	LIBJVMTI=$PWD/libperf-jvmti.so
+elif [ -e "$PREFIX/lib64/libperf-jvmti.so" ]; then
+	LIBJVMTI=$PREFIX/lib64/libperf-jvmti.so
+elif [ -e "$PREFIX/lib/libperf-jvmti.so" ]; then
+	LIBJVMTI=$PREFIX/lib/libperf-jvmti.so
+elif [ -e "/usr/lib/linux-tools-$(uname -a | awk '{ print $3 }' | sed -r 's/-generic//')/libperf-jvmti.so" ]; then
+	LIBJVMTI=/usr/lib/linux-tools-$(uname -a | awk '{ print $3 }' | sed -r 's/-generic//')/libperf-jvmti.so
+else
+	echo "Fail to find libperf-jvmti.so"
+	# JVMTI is a build option, skip the test if fail to find lib
+	exit 2
+fi
+
+cat <<EOF | perf record -k 1 -o $PERF_DATA jshell -s -J-agentpath:$LIBJVMTI
+int fib(int x) {
+	return x > 1 ? fib(x - 2) + fib(x - 1) : 1;
+}
+
+int q = 0;
+
+for (int i = 0; i < 10; i++)
+	q += fib(i);
+
+System.out.println(q);
+EOF
+
+if [ $? -ne 0 ]; then
+	echo "Fail to record for java program"
+	exit 1
+fi
+
+if ! perf inject -i $PERF_DATA -o $PERF_INJ_DATA -j; then
+	echo "Fail to inject samples"
+	exit 1
+fi
+
+# Below is an example of the instruction samples reporting:
+#   8.18%  jshell           jitted-50116-29.so    [.] Interpreter
+#   0.75%  Thread-1         jitted-83602-1670.so  [.] jdk.internal.jimage.BasicImageReader.getString(int)
+perf report --stdio -i ${PERF_INJ_DATA} 2>&1 | \
+	egrep " +[0-9]+\.[0-9]+% .* (Interpreter|jdk\.internal).*" > /dev/null 2>&1
+
+if [ $? -ne 0 ]; then
+	echo "Fail to find java symbols"
+	exit 1
+fi
+
+exit 0
diff --git a/tools/perf/tests/sigtrap.c b/tools/perf/tests/sigtrap.c
index e32ece90e164..1de7478ec189 100644
--- a/tools/perf/tests/sigtrap.c
+++ b/tools/perf/tests/sigtrap.c
@@ -54,6 +54,63 @@ static struct perf_event_attr make_event_attr(void)
 	return attr;
 }
 
+#ifdef HAVE_BPF_SKEL
+#include <bpf/btf.h>
+
+static bool attr_has_sigtrap(void)
+{
+	bool ret = false;
+	struct btf *btf;
+	const struct btf_type *t;
+	const struct btf_member *m;
+	const char *name;
+	int i, id;
+
+	btf = btf__load_vmlinux_btf();
+	if (btf == NULL) {
+		/* should be an old kernel */
+		return false;
+	}
+
+	id = btf__find_by_name_kind(btf, "perf_event_attr", BTF_KIND_STRUCT);
+	if (id < 0)
+		goto out;
+
+	t = btf__type_by_id(btf, id);
+	for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
+		name = btf__name_by_offset(btf, m->name_off);
+		if (!strcmp(name, "sigtrap")) {
+			ret = true;
+			break;
+		}
+	}
+out:
+	btf__free(btf);
+	return ret;
+}
+#else  /* !HAVE_BPF_SKEL */
+static bool attr_has_sigtrap(void)
+{
+	struct perf_event_attr attr = {
+		.type		= PERF_TYPE_SOFTWARE,
+		.config		= PERF_COUNT_SW_DUMMY,
+		.size		= sizeof(attr),
+		.remove_on_exec = 1, /* Required by sigtrap. */
+		.sigtrap	= 1, /* Request synchronous SIGTRAP on event. */
+	};
+	int fd;
+	bool ret = false;
+
+	fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag());
+	if (fd >= 0) {
+		ret = true;
+		close(fd);
+	}
+
+	return ret;
+}
+#endif  /* HAVE_BPF_SKEL */
+
 static void
 sigtrap_handler(int signum __maybe_unused, siginfo_t *info, void *ucontext __maybe_unused)
 {
@@ -139,7 +196,13 @@ static int test__sigtrap(struct test_suite *test __maybe_unused, int subtest __m
 
 	fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag());
 	if (fd < 0) {
-		pr_debug("FAILED sys_perf_event_open(): %s\n", str_error_r(errno, sbuf, sizeof(sbuf)));
+		if (attr_has_sigtrap()) {
+			pr_debug("FAILED sys_perf_event_open(): %s\n",
+				 str_error_r(errno, sbuf, sizeof(sbuf)));
+		} else {
+			pr_debug("perf_event_attr doesn't have sigtrap\n");
+			ret = TEST_SKIP;
+		}
 		goto out_restore_sigaction;
 	}
 
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 2d46af9ef935..87f565c7f650 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -6,6 +6,7 @@
 #include <time.h>
 #include <stdlib.h>
 #include <linux/zalloc.h>
+#include <linux/err.h>
 #include <perf/cpumap.h>
 #include <perf/evlist.h>
 #include <perf/mmap.h>
@@ -398,19 +399,13 @@ static int test__switch_tracking(struct test_suite *test __maybe_unused, int sub
 		goto out;
 	}
 
-	err = parse_event(evlist, sched_switch);
-	if (err) {
-		pr_debug("Failed to parse event %s\n", sched_switch);
+	switch_evsel = evlist__add_sched_switch(evlist, true);
+	if (IS_ERR(switch_evsel)) {
+		err = PTR_ERR(switch_evsel);
+		pr_debug("Failed to create event %s\n", sched_switch);
 		goto out_err;
 	}
 
-	switch_evsel = evlist__last(evlist);
-
-	evsel__set_sample_bit(switch_evsel, CPU);
-	evsel__set_sample_bit(switch_evsel, TIME);
-
-	switch_evsel->core.system_wide = true;
-	switch_evsel->no_aux_samples = true;
 	switch_evsel->immediate = true;
 
 	/* Test moving an event to the front */
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 0b4f61b6cc6b..c4630cfc80ea 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -147,7 +147,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		TEST_ASSERT_VAL("Cpu map - Die ID doesn't match",
 			session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
 		TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1);
-		TEST_ASSERT_VAL("Cpu map - Thread is set", id.thread == -1);
+		TEST_ASSERT_VAL("Cpu map - Thread IDX is set", id.thread_idx == -1);
 	}
 
 	// Test that core ID contains socket, die and core
@@ -163,7 +163,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		TEST_ASSERT_VAL("Core map - Die ID doesn't match",
 			session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die);
 		TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1);
-		TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1);
+		TEST_ASSERT_VAL("Core map - Thread IDX is set", id.thread_idx == -1);
 	}
 
 	// Test that die ID contains socket and die
@@ -179,7 +179,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1);
 		TEST_ASSERT_VAL("Die map - Core is set", id.core == -1);
 		TEST_ASSERT_VAL("Die map - CPU is set", id.cpu.cpu == -1);
-		TEST_ASSERT_VAL("Die map - Thread is set", id.thread == -1);
+		TEST_ASSERT_VAL("Die map - Thread IDX is set", id.thread_idx == -1);
 	}
 
 	// Test that socket ID contains only socket
@@ -193,7 +193,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1);
 		TEST_ASSERT_VAL("Socket map - Core is set", id.core == -1);
 		TEST_ASSERT_VAL("Socket map - CPU is set", id.cpu.cpu == -1);
-		TEST_ASSERT_VAL("Socket map - Thread is set", id.thread == -1);
+		TEST_ASSERT_VAL("Socket map - Thread IDX is set", id.thread_idx == -1);
 	}
 
 	// Test that node ID contains only node
@@ -205,7 +205,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 		TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1);
 		TEST_ASSERT_VAL("Node map - Core is set", id.core == -1);
 		TEST_ASSERT_VAL("Node map - CPU is set", id.cpu.cpu == -1);
-		TEST_ASSERT_VAL("Node map - Thread is set", id.thread == -1);
+		TEST_ASSERT_VAL("Node map - Thread IDX is set", id.thread_idx == -1);
 	}
 	perf_session__delete(session);
 
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 4fd8d703ff19..8ab035b55875 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -43,10 +43,11 @@ static bool is_ignored_symbol(const char *name, char type)
 	/* Symbol names that begin with the following are ignored.*/
 	static const char * const ignored_prefixes[] = {
 		"$",			/* local symbols for ARM, MIPS, etc. */
-		".LASANPC",		/* s390 kasan local symbols */
+		".L",			/* local labels, .LBB,.Ltmpxxx,.L__unnamed_xx,.LASANPC, etc. */
 		"__crc_",		/* modversions */
 		"__efistub_",		/* arm64 EFI stub namespace */
-		"__kvm_nvhe_",		/* arm64 non-VHE KVM namespace */
+		"__kvm_nvhe_$",		/* arm64 local symbols in non-VHE KVM namespace */
+		"__kvm_nvhe_.L",	/* arm64 local symbols in non-VHE KVM namespace */
 		"__AArch64ADRPThunk_",	/* arm64 lld */
 		"__ARMV5PILongThunk_",	/* arm lld */
 		"__ARMV7PILongThunk_",
diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c
index 9d4c45184e71..56455da30341 100644
--- a/tools/perf/tests/wp.c
+++ b/tools/perf/tests/wp.c
@@ -2,7 +2,9 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <errno.h>
 #include <sys/ioctl.h>
+#include <linux/compiler.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/kernel.h>
 #include "tests.h"
@@ -137,8 +139,7 @@ static int test__wp_rw(struct test_suite *test __maybe_unused,
 #endif
 }
 
-static int test__wp_modify(struct test_suite *test __maybe_unused,
-			   int subtest __maybe_unused)
+static int test__wp_modify(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
 #if defined(__s390x__)
 	return TEST_SKIP;
@@ -160,6 +161,11 @@ static int test__wp_modify(struct test_suite *test __maybe_unused,
 	new_attr.disabled = 1;
 	ret = ioctl(fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr);
 	if (ret < 0) {
+		if (errno == ENOTTY) {
+			test->test_cases[subtest].skip_reason = "missing kernel support";
+			ret = TEST_SKIP;
+		}
+
 		pr_debug("ioctl(PERF_EVENT_IOC_MODIFY_ATTRIBUTES) failed\n");
 		close(fd);
 		return ret;