aboutsummaryrefslogtreecommitdiff
path: root/tools/perf/bench/futex-lock-pi.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-30 11:15:14 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-30 11:15:14 -0800
commitd8b91dde38f4c43bd0bbbf17a90f735b16aaff2c (patch)
treebd72dabf6e4b23e060fce429c87e60504f69de54 /tools/perf/bench/futex-lock-pi.c
parent5e7481a25e90b661d1dbbba18be3fd3dfe12ec6f (diff)
parente4c1091cb495d9cbec8956d642644a71a1689958 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Kernel side changes: - Clean up the x86 instruction decoder (Masami Hiramatsu) - Add new uprobes optimization for PUSH instructions on x86 (Yonghong Song) - Add MSR_IA32_THERM_STATUS to the MSR events (Stephane Eranian) - Fix misc bugs, update documentation, plus various cleanups (Jiri Olsa) There's a large number of tooling side improvements: - Intel-PT/BTS improvements (Adrian Hunter) - Numerous 'perf trace' improvements (Arnaldo Carvalho de Melo) - Introduce an errno code to string facility (Hendrik Brueckner) - Various build system improvements (Jiri Olsa) - Add support for CoreSight trace decoding by making the perf tools use the external openCSD (Mathieu Poirier, Tor Jeremiassen) - Add ARM Statistical Profiling Extensions (SPE) support (Kim Phillips) - libtraceevent updates (Steven Rostedt) - Intel vendor event JSON updates (Andi Kleen) - Introduce 'perf report --mmaps' and 'perf report --tasks' to show info present in 'perf.data' (Jiri Olsa, Arnaldo Carvalho de Melo) - Add infrastructure to record first and last sample time to the perf.data file header, so that when processing all samples in a 'perf record' session, such as when doing build-id processing, or when specifically requesting that that info be recorded, use that in 'perf report --time', that also got support for percent slices in addition to absolute ones. I.e. now it is possible to ask for the samples in the 10%-20% time slice of a perf.data file (Jin Yao) - Allow system wide 'perf stat --per-thread', sorting the result (Jin Yao) E.g.: [root@jouet ~]# perf stat --per-thread --metrics IPC ^C Performance counter stats for 'system wide': make-22229 23,012,094,032 inst_retired.any # 0.8 IPC cc1-22419 692,027,497 inst_retired.any # 0.8 IPC gcc-22418 328,231,855 inst_retired.any # 0.9 IPC cc1-22509 220,853,647 inst_retired.any # 0.8 IPC gcc-22486 199,874,810 inst_retired.any # 1.0 IPC as-22466 177,896,365 inst_retired.any # 0.9 IPC cc1-22465 150,732,374 inst_retired.any # 0.8 IPC gcc-22508 112,555,593 inst_retired.any # 0.9 IPC cc1-22487 108,964,079 inst_retired.any # 0.7 IPC qemu-system-x86-2697 21,330,550 inst_retired.any # 0.3 IPC systemd-journal-551 20,642,951 inst_retired.any # 0.4 IPC docker-containe-17651 9,552,892 inst_retired.any # 0.5 IPC dockerd-current-9809 7,528,586 inst_retired.any # 0.5 IPC make-22153 12,504,194,380 inst_retired.any # 0.8 IPC python2-22429 12,081,290,954 inst_retired.any # 0.8 IPC <SNIP> python2-22429 15,026,328,103 cpu_clk_unhalted.thread cc1-22419 826,660,193 cpu_clk_unhalted.thread gcc-22418 365,321,295 cpu_clk_unhalted.thread cc1-22509 279,169,362 cpu_clk_unhalted.thread gcc-22486 210,156,950 cpu_clk_unhalted.thread <SNIP> 5.638075538 seconds time elapsed [root@jouet ~]# - Improve shell auto-completion of perf events (Jin Yao) - 'perf probe' improvements (Masami Hiramatsu) - Improve PMU infrastructure to support amp64's ThunderX2 implementation defined core events (Ganapatrao Kulkarni) - Various annotation related improvements and fixes (Thomas Richter) - Clarify usage of 'overwrite' and 'backward' in the evlist/mmap code, removing the 'overwrite' parameter from several functions as it was always used it as 'false' (Wang Nan) - Fix/improve 'perf record' reverse recording support (Wang Nan) - Improve command line options documentation (Sihyeon Jang) - Optimize sample parsing for ordering events, where we don't need to parse all the PERF_SAMPLE_ bits, just the ones leading to the timestamp needed to reorder events (Jiri Olsa) - Generalize the annotation code to support other source information besides objdump/DWARF obtained ones, starting with python scripts, that will is slated to be merged soon (Jiri Olsa) - ... and a lot more that I failed to list, see the shortlog and changelog for details" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (262 commits) perf trace beauty flock: Move to separate object file perf evlist: Remove fcntl.h from evlist.h perf trace beauty futex: Beautify FUTEX_BITSET_MATCH_ANY perf trace: Do not print from time delta for interrupted syscall lines perf trace: Add --print-sample perf bpf: Remove misplaced __maybe_unused attribute MAINTAINERS: Adding entry for CoreSight trace decoding perf tools: Add mechanic to synthesise CoreSight trace packets perf tools: Add full support for CoreSight trace decoding pert tools: Add queue management functionality perf tools: Add functionality to communicate with the openCSD decoder perf tools: Add support for decoding CoreSight trace data perf tools: Add decoder mechanic to support dumping trace data perf tools: Add processing of coresight metadata perf tools: Add initial entry point for decoder CoreSight traces perf tools: Integrating the CoreSight decoding library perf vendor events intel: Update IvyTown files to V20 perf vendor events intel: Update IvyBridge files to V20 perf vendor events intel: Update BroadwellDE events to V7 perf vendor events intel: Update SkylakeX events to V1.06 ...
Diffstat (limited to 'tools/perf/bench/futex-lock-pi.c')
-rw-r--r--tools/perf/bench/futex-lock-pi.c23
1 files changed, 14 insertions, 9 deletions
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 08653ae8a8c4..8e9c4753e304 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -15,6 +15,7 @@
#include <errno.h>
#include "bench.h"
#include "futex.h"
+#include "cpumap.h"
#include <err.h>
#include <stdlib.h>
@@ -32,7 +33,7 @@ static struct worker *worker;
static unsigned int nsecs = 10;
static bool silent = false, multi = false;
static bool done = false, fshared = false;
-static unsigned int ncpus, nthreads = 0;
+static unsigned int nthreads = 0;
static int futex_flag = 0;
struct timeval start, end, runtime;
static pthread_mutex_t thread_lock;
@@ -113,9 +114,10 @@ static void *workerfn(void *arg)
return NULL;
}
-static void create_threads(struct worker *w, pthread_attr_t thread_attr)
+static void create_threads(struct worker *w, pthread_attr_t thread_attr,
+ struct cpu_map *cpu)
{
- cpu_set_t cpu;
+ cpu_set_t cpuset;
unsigned int i;
threads_starting = nthreads;
@@ -130,10 +132,10 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr)
} else
worker[i].futex = &global_futex;
- CPU_ZERO(&cpu);
- CPU_SET(i % ncpus, &cpu);
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
- if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
@@ -147,19 +149,22 @@ int bench_futex_lock_pi(int argc, const char **argv)
unsigned int i;
struct sigaction act;
pthread_attr_t thread_attr;
+ struct cpu_map *cpu;
argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
if (argc)
goto err;
- ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ err(EXIT_FAILURE, "calloc");
sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);
if (!nthreads)
- nthreads = ncpus;
+ nthreads = cpu->nr;
worker = calloc(nthreads, sizeof(*worker));
if (!worker)
@@ -180,7 +185,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
pthread_attr_init(&thread_attr);
gettimeofday(&start, NULL);
- create_threads(worker, thread_attr);
+ create_threads(worker, thread_attr, cpu);
pthread_attr_destroy(&thread_attr);
pthread_mutex_lock(&thread_lock);