13 files changed, 418 insertions, 125 deletions
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index d1ebb5561e5b..6f921db33cf9 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -151,11 +151,21 @@ static int detect_ioctl(void)
 static int detect_share(int wp_cnt, int bp_cnt)
 {
 	struct perf_event_attr attr;
-	int i, fd[wp_cnt + bp_cnt], ret;
+	int i, *fd = NULL, ret = -1;
+
+	if (wp_cnt + bp_cnt == 0)
+		return 0;
+
+	fd = malloc(sizeof(int) * (wp_cnt + bp_cnt));
+	if (!fd)
+		return -1;
 
 	for (i = 0; i < wp_cnt; i++) {
 		fd[i] = wp_event((void *)&the_var, &attr);
-		TEST_ASSERT_VAL("failed to create wp\n", fd[i] != -1);
+		if (fd[i] == -1) {
+			pr_err("failed to create wp\n");
+			goto out;
+		}
 	}
 
 	for (; i < (bp_cnt + wp_cnt); i++) {
@@ -166,9 +176,11 @@ static int detect_share(int wp_cnt, int bp_cnt)
 
 	ret = i != (bp_cnt + wp_cnt);
 
+out:
 	while (i--)
 		close(fd[i]);
 
+	free(fd);
 	return ret;
 }
 
diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c
index ab4b98b3165d..7981c69ed1b4 100644
--- a/tools/perf/tests/bpf-script-example.c
+++ b/tools/perf/tests/bpf-script-example.c
@@ -17,20 +17,31 @@ static void *(*bpf_map_lookup_elem)(void *map, void *key) =
 static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) =
 	(void *) BPF_FUNC_map_update_elem;
 
-struct bpf_map_def {
-	unsigned int type;
-	unsigned int key_size;
-	unsigned int value_size;
-	unsigned int max_entries;
-};
+/*
+ * Following macros are taken from tools/lib/bpf/bpf_helpers.h,
+ * and are used to create BTF defined maps. It is easier to take
+ * 2 simple macros, than being able to include above header in
+ * runtime.
+ *
+ * __uint - defines integer attribute of BTF map definition,
+ * Such attributes are represented using a pointer to an array,
+ * in which dimensionality of array encodes specified integer
+ * value.
+ *
+ * __type - defines pointer variable with typeof(val) type for
+ * attributes like key or value, which will be defined by the
+ * size of the type.
+ */
+#define __uint(name, val) int (*name)[val]
+#define __type(name, val) typeof(val) *name
 
 #define SEC(NAME) __attribute__((section(NAME), used))
-struct bpf_map_def SEC("maps") flip_table = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(int),
-	.max_entries = 1,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} flip_table SEC(".maps");
 
 SEC("func=do_epoll_wait")
 int bpf_func__SyS_epoll_pwait(void *ctx)
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index d54c5371c6a6..2efe9e3a63b8 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "util/debug.h"
 #include "util/expr.h"
+#include "util/header.h"
 #include "util/smt.h"
 #include "tests.h"
+#include <math.h>
 #include <stdlib.h>
 #include <string.h>
 #include <linux/zalloc.h>
@@ -69,6 +71,11 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 	double val, num_cpus, num_cores, num_dies, num_packages;
 	int ret;
 	struct expr_parse_ctx *ctx;
+	bool is_intel = false;
+	char buf[128];
+
+	if (!get_cpuid(buf, sizeof(buf)))
+		is_intel = strstr(buf, "Intel") != NULL;
 
 	TEST_ASSERT_EQUAL("ids_union", test_ids_union(), 0);
 
@@ -97,6 +104,8 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 	ret |= test(ctx, "2.2 > 2.2", 0);
 	ret |= test(ctx, "2.2 < 1.1", 0);
 	ret |= test(ctx, "1.1 > 2.2", 0);
+	ret |= test(ctx, "1.1e10 < 1.1e100", 1);
+	ret |= test(ctx, "1.1e2 > 1.1e-2", 1);
 
 	if (ret) {
 		expr__ctx_free(ctx);
@@ -173,6 +182,12 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u
 	if (num_dies) // Some platforms do not have CPU die support, for example s390
 		TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages);
 
+	TEST_ASSERT_VAL("#system_tsc_freq", expr__parse(&val, ctx, "#system_tsc_freq") == 0);
+	if (is_intel)
+		TEST_ASSERT_VAL("#system_tsc_freq > 0", val > 0);
+	else
+		TEST_ASSERT_VAL("#system_tsc_freq == 0", fpclassify(val) == FP_ZERO);
+
 	/*
 	 * Source count returns the number of events aggregating in a leader
 	 * event including the leader. Check parsing yields an id.
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 30bbe144648a..dfb6173b2a82 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -170,14 +170,139 @@ out_free_threads:
 	return err;
 }
 
+static int test_stat_user_read(int event)
+{
+	struct perf_counts_values counts = { .val = 0 };
+	struct perf_thread_map *threads;
+	struct perf_evsel *evsel;
+	struct perf_event_mmap_page *pc;
+	struct perf_event_attr attr = {
+		.type	= PERF_TYPE_HARDWARE,
+		.config	= event,
+#ifdef __aarch64__
+		.config1 = 0x2,		/* Request user access */
+#endif
+	};
+	int err, i, ret = TEST_FAIL;
+	bool opened = false, mapped = false;
+
+	threads = perf_thread_map__new_dummy();
+	TEST_ASSERT_VAL("failed to create threads", threads);
+
+	perf_thread_map__set_pid(threads, 0, 0);
+
+	evsel = perf_evsel__new(&attr);
+	TEST_ASSERT_VAL("failed to create evsel", evsel);
+
+	err = perf_evsel__open(evsel, NULL, threads);
+	if (err) {
+		pr_err("failed to open evsel: %s\n", strerror(-err));
+		ret = TEST_SKIP;
+		goto out;
+	}
+	opened = true;
+
+	err = perf_evsel__mmap(evsel, 0);
+	if (err) {
+		pr_err("failed to mmap evsel: %s\n", strerror(-err));
+		goto out;
+	}
+	mapped = true;
+
+	pc = perf_evsel__mmap_base(evsel, 0, 0);
+	if (!pc) {
+		pr_err("failed to get mmapped address\n");
+		goto out;
+	}
+
+	if (!pc->cap_user_rdpmc || !pc->index) {
+		pr_err("userspace counter access not %s\n",
+			!pc->cap_user_rdpmc ? "supported" : "enabled");
+		ret = TEST_SKIP;
+		goto out;
+	}
+	if (pc->pmc_width < 32) {
+		pr_err("userspace counter width not set (%d)\n", pc->pmc_width);
+		goto out;
+	}
+
+	perf_evsel__read(evsel, 0, 0, &counts);
+	if (counts.val == 0) {
+		pr_err("failed to read value for evsel\n");
+		goto out;
+	}
+
+	for (i = 0; i < 5; i++) {
+		volatile int count = 0x10000 << i;
+		__u64 start, end, last = 0;
+
+		pr_debug("\tloop = %u, ", count);
+
+		perf_evsel__read(evsel, 0, 0, &counts);
+		start = counts.val;
+
+		while (count--) ;
+
+		perf_evsel__read(evsel, 0, 0, &counts);
+		end = counts.val;
+
+		if ((end - start) < last) {
+			pr_err("invalid counter data: end=%llu start=%llu last= %llu\n",
+				end, start, last);
+			goto out;
+		}
+		last = end - start;
+		pr_debug("count = %llu\n", end - start);
+	}
+	ret = TEST_OK;
+
+out:
+	if (mapped)
+		perf_evsel__munmap(evsel);
+	if (opened)
+		perf_evsel__close(evsel);
+	perf_evsel__delete(evsel);
+
+	perf_thread_map__put(threads);
+	return ret;
+}
+
+static int test__mmap_user_read_instr(struct test_suite *test __maybe_unused,
+				      int subtest __maybe_unused)
+{
+	return test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS);
+}
+
+static int test__mmap_user_read_cycles(struct test_suite *test __maybe_unused,
+				       int subtest __maybe_unused)
+{
+	return test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES);
+}
+
 static struct test_case tests__basic_mmap[] = {
 	TEST_CASE_REASON("Read samples using the mmap interface",
 			 basic_mmap,
 			 "permissions"),
+	TEST_CASE_REASON("User space counter reading of instructions",
+			 mmap_user_read_instr,
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
+			 "permissions"
+#else
+			 "unsupported"
+#endif
+		),
+	TEST_CASE_REASON("User space counter reading of cycles",
+			 mmap_user_read_cycles,
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
+			 "permissions"
+#else
+			 "unsupported"
+#endif
+		),
 	{	.name = NULL, }
 };
 
 struct test_suite suite__basic_mmap = {
-	.desc = "Read samples using the mmap interface",
+	.desc = "mmap interface tests",
 	.test_cases = tests__basic_mmap,
 };
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index 4ad0dfbc8b21..7c7d20fc503a 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -20,8 +20,6 @@
 #include "tsc.h"
 #include "mmap.h"
 #include "tests.h"
-#include "pmu.h"
-#include "pmu-hybrid.h"
 
 /*
  * Except x86_64/i386 and Arm64, other archs don't support TSC in perf.  Just
@@ -106,28 +104,21 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
 
 	evlist__config(evlist, &opts, NULL);
 
-	evsel = evlist__first(evlist);
-
-	evsel->core.attr.comm = 1;
-	evsel->core.attr.disabled = 1;
-	evsel->core.attr.enable_on_exec = 0;
-
-	/*
-	 * For hybrid "cycles:u", it creates two events.
-	 * Init the second evsel here.
-	 */
-	if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) {
-		evsel = evsel__next(evsel);
+	/* For hybrid "cycles:u", it creates two events */
+	evlist__for_each_entry(evlist, evsel) {
 		evsel->core.attr.comm = 1;
 		evsel->core.attr.disabled = 1;
 		evsel->core.attr.enable_on_exec = 0;
 	}
 
-	if (evlist__open(evlist) == -ENOENT) {
-		err = TEST_SKIP;
+	ret = evlist__open(evlist);
+	if (ret < 0) {
+		if (ret == -ENOENT)
+			err = TEST_SKIP;
+		else
+			pr_debug("evlist__open() failed\n");
 		goto out_err;
 	}
-	CHECK__(evlist__open(evlist));
 
 	CHECK__(evlist__mmap(evlist, UINT_MAX));
 
@@ -167,10 +158,12 @@ static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int su
 				goto next_event;
 
 			if (strcmp(event->comm.comm, comm1) == 0) {
+				CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event));
 				CHECK__(evsel__parse_sample(evsel, event, &sample));
 				comm1_time = sample.time;
 			}
 			if (strcmp(event->comm.comm, comm2) == 0) {
+				CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event));
 				CHECK__(evsel__parse_sample(evsel, event, &sample));
 				comm2_time = sample.time;
 			}
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index f13368569d8b..263cbb67c861 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -812,6 +812,15 @@ static int check_parse_id(const char *id, struct parse_events_error *error,
 	for (cur = strchr(dup, '@') ; cur; cur = strchr(++cur, '@'))
 		*cur = '/';
 
+	if (fake_pmu) {
+		/*
+		 * Every call to __parse_events will try to initialize the PMU
+		 * state from sysfs and then clean it up at the end. Reset the
+		 * PMU events to the test state so that we don't pick up
+		 * erroneous prefixes and suffixes.
+		 */
+		perf_pmu__test_parse_init();
+	}
 	ret = __parse_events(evlist, dup, error, fake_pmu);
 	free(dup);
 
@@ -1115,6 +1124,7 @@ static int test__parsing_fake(struct test_suite *test __maybe_unused,
 				break;
 			if (!pe->metric_expr)
 				continue;
+			pr_debug("Found metric '%s' for '%s'\n", pe->metric_name, map->cpuid);
 			err = metric_parse_fake(pe->metric_expr);
 			if (err)
 				return err;
diff --git a/tools/perf/tests/shell/lib/perf_csv_output_lint.py b/tools/perf/tests/shell/lib/perf_csv_output_lint.py
deleted file mode 100644
index 714f283cfb1b..000000000000
--- a/tools/perf/tests/shell/lib/perf_csv_output_lint.py
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/python
-# SPDX-License-Identifier: GPL-2.0
-
-import argparse
-import sys
-
-# Basic sanity check of perf CSV output as specified in the man page.
-# Currently just checks the number of fields per line in output.
-
-ap = argparse.ArgumentParser()
-ap.add_argument('--no-args', action='store_true')
-ap.add_argument('--interval', action='store_true')
-ap.add_argument('--system-wide-no-aggr', action='store_true')
-ap.add_argument('--system-wide', action='store_true')
-ap.add_argument('--event', action='store_true')
-ap.add_argument('--per-core', action='store_true')
-ap.add_argument('--per-thread', action='store_true')
-ap.add_argument('--per-die', action='store_true')
-ap.add_argument('--per-node', action='store_true')
-ap.add_argument('--per-socket', action='store_true')
-ap.add_argument('--separator', default=',', nargs='?')
-args = ap.parse_args()
-
-Lines = sys.stdin.readlines()
-
-def check_csv_output(exp):
-  for line in Lines:
-    if 'failed' not in line:
-      count = line.count(args.separator)
-      if count != exp:
-        sys.stdout.write(''.join(Lines))
-        raise RuntimeError(f'wrong number of fields. expected {exp} in {line}')
-
-try:
-  if args.no_args or args.system_wide or args.event:
-    expected_items = 6
-  elif args.interval or args.per_thread or args.system_wide_no_aggr:
-    expected_items = 7
-  elif args.per_core or args.per_socket or args.per_node or args.per_die:
-    expected_items = 8
-  else:
-    ap.print_help()
-    raise RuntimeError('No checking option specified')
-  check_csv_output(expected_items)
-
-except:
-  sys.stdout.write('Test failed for input: ' + ''.join(Lines))
-  raise
diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh
index 983220ef3cb4..eb5196f58190 100755
--- a/tools/perf/tests/shell/stat+csv_output.sh
+++ b/tools/perf/tests/shell/stat+csv_output.sh
@@ -6,20 +6,42 @@
 
 set -e
 
-pythonchecker=$(dirname $0)/lib/perf_csv_output_lint.py
-if [ "x$PYTHON" == "x" ]
-then
-	if which python3 > /dev/null
-	then
-		PYTHON=python3
-	elif which python > /dev/null
-	then
-		PYTHON=python
-	else
-		echo Skipping test, python not detected please set environment variable PYTHON.
-		exit 2
-	fi
-fi
+function commachecker()
+{
+	local -i cnt=0
+	local exp=0
+
+	case "$1"
+	in "--no-args")		exp=6
+	;; "--system-wide")	exp=6
+	;; "--event")		exp=6
+	;; "--interval")	exp=7
+	;; "--per-thread")	exp=7
+	;; "--system-wide-no-aggr")	exp=7
+				[ $(uname -m) = "s390x" ] && exp='^[6-7]$'
+	;; "--per-core")	exp=8
+	;; "--per-socket")	exp=8
+	;; "--per-node")	exp=8
+	;; "--per-die")		exp=8
+	esac
+
+	while read line
+	do
+		# Check for lines beginning with Failed
+		x=${line:0:6}
+		[ "$x" = "Failed" ] && continue
+
+		# Count the number of commas
+		x=$(echo $line | tr -d -c ',')
+		cnt="${#x}"
+		# echo $line $cnt
+		[[ ! "$cnt" =~ $exp ]] && {
+			echo "wrong number of fields. expected $exp in $line" 1>&2
+			exit 1;
+		}
+	done
+	return 0
+}
 
 # Return true if perf_event_paranoid is > $1 and not running as root.
 function ParanoidAndNotRoot()
@@ -30,7 +52,7 @@ function ParanoidAndNotRoot()
 check_no_args()
 {
 	echo -n "Checking CSV output: no args "
-	perf stat -x, true 2>&1 | $PYTHON $pythonchecker --no-args
+	perf stat -x, true 2>&1 | commachecker --no-args
 	echo "[Success]"
 }
 
@@ -42,7 +64,7 @@ check_system_wide()
 		echo "[Skip] paranoid and not root"
 		return
 	fi
-	perf stat -x, -a true 2>&1 | $PYTHON $pythonchecker --system-wide
+	perf stat -x, -a true 2>&1 | commachecker --system-wide
 	echo "[Success]"
 }
 
@@ -55,14 +77,14 @@ check_system_wide_no_aggr()
 		return
 	fi
 	echo -n "Checking CSV output: system wide no aggregation "
-	perf stat -x, -A -a --no-merge true 2>&1 | $PYTHON $pythonchecker --system-wide-no-aggr
+	perf stat -x, -A -a --no-merge true 2>&1 | commachecker --system-wide-no-aggr
 	echo "[Success]"
 }
 
 check_interval()
 {
 	echo -n "Checking CSV output: interval "
-	perf stat -x, -I 1000 true 2>&1 | $PYTHON $pythonchecker --interval
+	perf stat -x, -I 1000 true 2>&1 | commachecker --interval
 	echo "[Success]"
 }
 
@@ -70,7 +92,7 @@ check_interval()
 check_event()
 {
 	echo -n "Checking CSV output: event "
-	perf stat -x, -e cpu-clock true 2>&1 | $PYTHON $pythonchecker --event
+	perf stat -x, -e cpu-clock true 2>&1 | commachecker --event
 	echo "[Success]"
 }
 
@@ -82,7 +104,7 @@ check_per_core()
 		echo "[Skip] paranoid and not root"
 		return
 	fi
-	perf stat -x, --per-core -a true 2>&1 | $PYTHON $pythonchecker --per-core
+	perf stat -x, --per-core -a true 2>&1 | commachecker --per-core
 	echo "[Success]"
 }
 
@@ -94,7 +116,7 @@ check_per_thread()
 		echo "[Skip] paranoid and not root"
 		return
 	fi
-	perf stat -x, --per-thread -a true 2>&1 | $PYTHON $pythonchecker --per-thread
+	perf stat -x, --per-thread -a true 2>&1 | commachecker --per-thread
 	echo "[Success]"
 }
 
@@ -106,7 +128,7 @@ check_per_die()
 		echo "[Skip] paranoid and not root"
 		return
 	fi
-	perf stat -x, --per-die -a true 2>&1 | $PYTHON $pythonchecker --per-die
+	perf stat -x, --per-die -a true 2>&1 | commachecker --per-die
 	echo "[Success]"
 }
 
@@ -118,7 +140,7 @@ check_per_node()
 		echo "[Skip] paranoid and not root"
 		return
 	fi
-	perf stat -x, --per-node -a true 2>&1 | $PYTHON $pythonchecker --per-node
+	perf stat -x, --per-node -a true 2>&1 | commachecker --per-node
 	echo "[Success]"
 }
 
@@ -130,7 +152,7 @@ check_per_socket()
 		echo "[Skip] paranoid and not root"
 		return
 	fi
-	perf stat -x, --per-socket -a true 2>&1 | $PYTHON $pythonchecker --per-socket
+	perf stat -x, --per-socket -a true 2>&1 | commachecker --per-socket
 	echo "[Success]"
 }
 
diff --git a/tools/perf/tests/shell/stat_all_metrics.sh b/tools/perf/tests/shell/stat_all_metrics.sh
index e7c59e5a7a98..6e79349e42be 100755
--- a/tools/perf/tests/shell/stat_all_metrics.sh
+++ b/tools/perf/tests/shell/stat_all_metrics.sh
@@ -1,26 +1,41 @@
-#!/bin/sh
+#!/bin/bash
 # perf all metrics test
 # SPDX-License-Identifier: GPL-2.0
 
-set -e
-
 err=0
 for m in $(perf list --raw-dump metrics); do
   echo "Testing $m"
   result=$(perf stat -M "$m" true 2>&1)
-  if [[ ! "$result" =~ "$m" ]] && [[ ! "$result" =~ "<not supported>" ]]; then
-    # We failed to see the metric and the events are support. Possibly the
-    # workload was too small so retry with something longer.
-    result=$(perf stat -M "$m" perf bench internals synthesize 2>&1)
-    if [[ ! "$result" =~ "$m" ]]; then
-      echo "Metric '$m' not printed in:"
-      echo "$result"
-      if [[ "$result" =~ "FP_ARITH" && "$err" != "1" ]]; then
-        echo "Skip, not fail, for FP issues"
-        err=2
-      else
-        err=1
-      fi
+  if [[ "$result" =~ "${m:0:50}" ]] || [[ "$result" =~ "<not supported>" ]]
+  then
+    continue
+  fi
+  # Failed so try system wide.
+  result=$(perf stat -M "$m" -a true 2>&1)
+  if [[ "$result" =~ "${m:0:50}" ]]
+  then
+    continue
+  fi
+  # Failed again, possibly the workload was too small so retry with something
+  # longer.
+  result=$(perf stat -M "$m" perf bench internals synthesize 2>&1)
+  if [[ "$result" =~ "${m:0:50}" ]]
+  then
+    continue
+  fi
+  echo "Metric '$m' not printed in:"
+  echo "$result"
+  if [[ "$err" != "1" ]]
+  then
+    err=2
+    if [[ "$result" =~ "FP_ARITH" || "$result" =~ "AMX" ]]
+    then
+      echo "Skip, not fail, for FP issues"
+    elif [[ "$result" =~ "PMM" ]]
+    then
+      echo "Skip, not fail, for Optane memory issues"
+    else
+      err=1
     fi
   fi
 done
diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
index 6ffbb27afaba..ec108d45d3c6 100755
--- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh
+++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
@@ -43,7 +43,7 @@ CFLAGS="-g -O0 -fno-inline -fno-omit-frame-pointer"
 cc $CFLAGS $TEST_PROGRAM_SOURCE -o $TEST_PROGRAM || exit 1
 
 # Add a 1 second delay to skip samples that are not in the leaf() function
-perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 -- $TEST_PROGRAM 2> /dev/null &
+perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 --user-callchains -- $TEST_PROGRAM 2> /dev/null &
 PID=$!
 
 echo " + Recording (PID=$PID)..."
diff --git a/tools/perf/tests/shell/test_arm_spe.sh b/tools/perf/tests/shell/test_arm_spe.sh
index e59044edc406..0d47479adba8 100755
--- a/tools/perf/tests/shell/test_arm_spe.sh
+++ b/tools/perf/tests/shell/test_arm_spe.sh
@@ -23,17 +23,20 @@ glb_err=0
 cleanup_files()
 {
 	rm -f ${perfdata}
+	rm -f ${perfdata}.old
 	exit $glb_err
 }
 
 trap cleanup_files exit term int
 
 arm_spe_report() {
-	if [ $2 != 0 ]; then
+	if [ $2 = 0 ]; then
+		echo "$1: PASS"
+	elif [ $2 = 2 ]; then
+		echo "$1: SKIPPED"
+	else
 		echo "$1: FAIL"
 		glb_err=$2
-	else
-		echo "$1: PASS"
 	fi
 }
 
@@ -85,5 +88,26 @@ arm_spe_snapshot_test() {
 	arm_spe_report "SPE snapshot testing" $err
 }
 
+arm_spe_system_wide_test() {
+	echo "Recording trace with system-wide mode $perfdata"
+
+	perf record -o - -e dummy -a -B true > /dev/null 2>&1
+	if [ $? != 0 ]; then
+		arm_spe_report "SPE system-wide testing" 2
+		return
+	fi
+
+	perf record -o ${perfdata} -e arm_spe// -a --no-bpf-event \
+		-- dd if=/dev/zero of=/dev/null count=100000 > /dev/null 2>&1
+
+	perf_script_samples dd &&
+	perf_report_samples dd
+
+	err=$?
+	arm_spe_report "SPE system-wide testing" $err
+}
+
 arm_spe_snapshot_test
+arm_spe_system_wide_test
+
 exit $glb_err
diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh
new file mode 100755
index 000000000000..c644f94a6500
--- /dev/null
+++ b/tools/perf/tests/shell/test_brstack.sh
@@ -0,0 +1,114 @@
+#!/bin/sh
+# Check branch stack sampling
+
+# SPDX-License-Identifier: GPL-2.0
+# German Gomez <[email protected]>, 2022
+
+# we need a C compiler to build the test programs
+# so bail if none is found
+if ! [ -x "$(command -v cc)" ]; then
+	echo "failed: no compiler, install gcc"
+	exit 2
+fi
+
+# skip the test if the hardware doesn't support branch stack sampling
+perf record -b -o- -B true > /dev/null 2>&1 || exit 2
+
+TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX)
+
+cleanup() {
+	rm -rf $TMPDIR
+}
+
+trap cleanup exit term int
+
+gen_test_program() {
+	# generate test program
+	cat << EOF > $1
+#define BENCH_RUNS 999999
+int cnt;
+void bar(void) {
+}			/* return */
+void foo(void) {
+	bar();		/* call */
+}			/* return */
+void bench(void) {
+  void (*foo_ind)(void) = foo;
+  if ((cnt++) % 3)	/* branch (cond) */
+    foo();		/* call */
+  bar();		/* call */
+  foo_ind();		/* call (ind) */
+}
+int main(void)
+{
+  int cnt = 0;
+  while (1) {
+    if ((cnt++) > BENCH_RUNS)
+      break;
+    bench();		/* call */
+  }			/* branch (uncond) */
+  return 0;
+}
+EOF
+}
+
+test_user_branches() {
+	echo "Testing user branch stack sampling"
+
+	gen_test_program "$TEMPDIR/program.c"
+	cc -fno-inline -g "$TEMPDIR/program.c" -o $TMPDIR/a.out
+
+	perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- $TMPDIR/a.out > /dev/null 2>&1
+	perf script -i $TMPDIR/perf.data --fields brstacksym | xargs -n1 > $TMPDIR/perf.script
+
+	# example of branch entries:
+	# 	foo+0x14/bar+0x40/P/-/-/0/CALL
+
+	set -x
+	egrep -m1 "^bench\+[^ ]*/foo\+[^ ]*/IND_CALL$"	$TMPDIR/perf.script
+	egrep -m1 "^foo\+[^ ]*/bar\+[^ ]*/CALL$"	$TMPDIR/perf.script
+	egrep -m1 "^bench\+[^ ]*/foo\+[^ ]*/CALL$"	$TMPDIR/perf.script
+	egrep -m1 "^bench\+[^ ]*/bar\+[^ ]*/CALL$"	$TMPDIR/perf.script
+	egrep -m1 "^bar\+[^ ]*/foo\+[^ ]*/RET$"		$TMPDIR/perf.script
+	egrep -m1 "^foo\+[^ ]*/bench\+[^ ]*/RET$"	$TMPDIR/perf.script
+	egrep -m1 "^bench\+[^ ]*/bench\+[^ ]*/COND$"	$TMPDIR/perf.script
+	egrep -m1 "^main\+[^ ]*/main\+[^ ]*/UNCOND$"	$TMPDIR/perf.script
+	set +x
+
+	# some branch types are still not being tested:
+	# IND COND_CALL COND_RET SYSCALL SYSRET IRQ SERROR NO_TX
+}
+
+# first argument <arg0> is the argument passed to "--branch-stack <arg0>,save_type,u"
+# second argument are the expected branch types for the given filter
+test_filter() {
+	local filter=$1
+	local expect=$2
+
+	echo "Testing branch stack filtering permutation ($filter,$expect)"
+
+	gen_test_program "$TEMPDIR/program.c"
+	cc -fno-inline -g "$TEMPDIR/program.c" -o $TMPDIR/a.out
+
+	perf record -o $TMPDIR/perf.data --branch-filter $filter,save_type,u -- $TMPDIR/a.out > /dev/null 2>&1
+	perf script -i $TMPDIR/perf.data --fields brstack | xargs -n1 > $TMPDIR/perf.script
+
+	# fail if we find any branch type that doesn't match any of the expected ones
+	# also consider UNKNOWN branch types (-)
+	if egrep -vm1 "^[^ ]*/($expect|-|( *))$" $TMPDIR/perf.script; then
+		return 1
+	fi
+}
+
+set -e
+
+test_user_branches
+
+test_filter "any_call"	"CALL|IND_CALL|COND_CALL|SYSCALL|IRQ"
+test_filter "call"	"CALL|SYSCALL"
+test_filter "cond"	"COND"
+test_filter "any_ret"	"RET|COND_RET|SYSRET|ERET"
+
+test_filter "call,cond"		"CALL|SYSCALL|COND"
+test_filter "any_call,cond"		"CALL|IND_CALL|COND_CALL|IRQ|SYSCALL|COND"
+test_filter "cond,any_call,any_ret"	"COND|CALL|IND_CALL|COND_CALL|SYSCALL|IRQ|RET|COND_RET|SYSRET|ERET"
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index d23a9e322ff5..0b4f61b6cc6b 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -115,7 +115,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
 	 * physical_package_id will be set to -1. Hence skip this
 	 * test if physical_package_id returns -1 for cpu from perf_cpu_map.
 	 */
-	if (strncmp(session->header.env.arch, "powerpc", 7)) {
+	if (!strncmp(session->header.env.arch, "ppc64le", 7)) {
 		if (cpu__get_socket_id(perf_cpu_map__cpu(map, 0)) == -1)
 			return TEST_SKIP;
 	}