From baa51277cf5dc844089ea2f6e0f78b1c5ca665d8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 5 Apr 2016 17:40:52 -0700 Subject: libnvdimm, test: add mock SMART data payload Provide simulated SMART data to enable the ndctl implementation of SMART data retrieval and parsing. The payload is defined here, "Section 4.1 SMART and Health Info (Function Index 1)": http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 44 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 3187322eeed7..d1c98d4386d4 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -330,6 +330,42 @@ static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err, return 0; } +static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len) +{ + static const struct nd_smart_payload smart_data = { + .flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID + | ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID + | ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID, + .health = ND_SMART_NON_CRITICAL_HEALTH, + .temperature = 23 * 16, + .spares = 75, + .alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, + .life_used = 5, + .shutdown_state = 0, + .vendor_size = 0, + }; + + if (buf_len < sizeof(*smart)) + return -EINVAL; + memcpy(smart->data, &smart_data, sizeof(smart_data)); + return 0; +} + +static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t, + unsigned int buf_len) +{ + static const struct nd_smart_threshold_payload smart_t_data = { + .alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, + .temperature = 40 * 16, + .spares = 5, + }; + + if (buf_len < sizeof(*smart_t)) + return -EINVAL; + memcpy(smart_t->data, &smart_t_data, sizeof(smart_t_data)); + return 0; +} + static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) @@ -368,6 +404,12 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nfit_test_cmd_set_config_data(buf, buf_len, t->label[i]); break; + case ND_CMD_SMART: + rc = nfit_test_cmd_smart(buf, buf_len); + break; + case ND_CMD_SMART_THRESHOLD: + rc = nfit_test_cmd_smart_threshold(buf, buf_len); + break; default: return -ENOTTY; } @@ -1254,10 +1296,12 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); + set_bit(ND_CMD_SMART, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_dsm_force_en); } static void nfit_test1_setup(struct nfit_test *t) -- cgit From cfa0963dc474fd098d6a7a722dcecfc143a4b377 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Mar 2016 12:55:00 -0600 Subject: kselftests/ftrace : Add event trigger testcases This adds simple event trigger testcases for ftracetest, which covers following triggers. - traceon-traceoff trigger - enable/disable_event trigger - snapshot trigger - stacktrace trigger - trigger filters Here is the test result. ---- # ./ftracetest test.d/trigger/ === Ftrace unit tests === [1] event trigger - test event enable/disable trigger [PASS] [2] event trigger - test trigger filter [PASS] [3] event trigger - test snapshot-trigger [PASS] [4] event trigger - test stacktrace-trigger [PASS] [5] event trigger - test traceon/off trigger [PASS] # of passed: 5 # of failed: 0 # of unresolved: 0 # of untested: 0 # of unsupported: 0 # of xfailed: 0 # of undefined(test bug): 0 ---- Link: http://lkml.kernel.org/r/12b9c2b289a0dc1e4386e7b77674611a83abca85.1457029949.git.tom.zanussi@linux.intel.com Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Shuah Khan Cc: Namhyung Kim Cc: Tom Zanussi Reviewed-by: Namhyung Kim Signed-off-by: Steven Rostedt --- tools/testing/selftests/ftrace/test.d/functions | 9 +++ .../ftrace/test.d/trigger/trigger-eventonoff.tc | 64 ++++++++++++++++++++++ .../ftrace/test.d/trigger/trigger-filter.tc | 59 ++++++++++++++++++++ .../ftrace/test.d/trigger/trigger-snapshot.tc | 56 +++++++++++++++++++ .../ftrace/test.d/trigger/trigger-stacktrace.tc | 53 ++++++++++++++++++ .../ftrace/test.d/trigger/trigger-traceonoff.tc | 58 ++++++++++++++++++++ 6 files changed, 299 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc (limited to 'tools/testing') diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 5d8cd06d920f..c37262f6c269 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -14,3 +14,12 @@ enable_tracing() { # start trace recording reset_tracer() { # reset the current tracer echo nop > current_tracer } + +reset_trigger() { # reset all current setting triggers + grep -v ^# events/*/*/trigger | + while read line; do + cmd=`echo $line | cut -f2- -d: | cut -f1 -d" "` + echo "!$cmd" > `echo $line | cut -f1 -d:` + done +} + diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc new file mode 100644 index 000000000000..1a9445021bf1 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc @@ -0,0 +1,64 @@ +#!/bin/sh +# description: event trigger - test event enable/disable trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep enable_event events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "event enable/disable trigger is not supported" + exit_unsupported +fi + +echo "Test enable_event trigger" +echo 0 > events/sched/sched_switch/enable +echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat events/sched/sched_switch/enable` != '1*' ]; then + fail "enable_event trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test disable_event trigger" +echo 1 > events/sched/sched_switch/enable +echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat events/sched/sched_switch/enable` != '0*' ]; then + fail "disable_event trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test semantic error for event enable/disable trigger" +! echo 'enable_event:nogroup:noevent' > events/sched/sched_process_fork/trigger +! echo 'disable_event+1' > events/sched/sched_process_fork/trigger +echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger +! echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger +! echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc new file mode 100644 index 000000000000..514e466e198b --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc @@ -0,0 +1,59 @@ +#!/bin/sh +# description: event trigger - test trigger filter + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test trigger filter" +echo 1 > tracing_on +echo 'traceoff if child_pid == 0' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat tracing_on` -ne 1 ]; then + fail "traceoff trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test semantic error for trigger filter" +! echo 'traceoff if a' > events/sched/sched_process_fork/trigger +! echo 'traceoff if common_pid=0' > events/sched/sched_process_fork/trigger +! echo 'traceoff if common_pid==b' > events/sched/sched_process_fork/trigger +echo 'traceoff if common_pid == 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger +! echo 'traceoff if common_pid == child_pid' > events/sched/sched_process_fork/trigger +echo 'traceoff if common_pid <= 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger +echo 'traceoff if common_pid >= 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger +echo 'traceoff if parent_pid >= 0 && child_pid >= 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger +echo 'traceoff if parent_pid >= 0 || child_pid >= 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger + + + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc new file mode 100644 index 000000000000..f84b80d551a2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc @@ -0,0 +1,56 @@ +#!/bin/sh +# description: event trigger - test snapshot-trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep snapshot events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "snapshot trigger is not supported" + exit_unsupported +fi + +echo "Test snapshot tigger" +echo 0 > snapshot +echo 1 > events/sched/sched_process_fork/enable +( echo "forked") +echo 'snapshot:1' > events/sched/sched_process_fork/trigger +( echo "forked") +grep sched_process_fork snapshot > /dev/null || \ + fail "snapshot trigger on sched_process_fork did not work" + +reset_trigger +echo 0 > snapshot +echo 0 > events/sched/sched_process_fork/enable + +echo "Test snapshot semantic errors" + +! echo "snapshot+1" > events/sched/sched_process_fork/trigger +echo "snapshot" > events/sched/sched_process_fork/trigger +! echo "snapshot" > events/sched/sched_process_fork/trigger + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc new file mode 100644 index 000000000000..9fa23b085def --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc @@ -0,0 +1,53 @@ +#!/bin/sh +# description: event trigger - test stacktrace-trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep stacktrace events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "stacktrace trigger is not supported" + exit_unsupported +fi + +echo "Test stacktrace tigger" +echo 0 > trace +echo 0 > options/stacktrace +echo 'stacktrace' > events/sched/sched_process_fork/trigger +( echo "forked") +grep "" trace > /dev/null || \ + fail "stacktrace trigger on sched_process_fork did not work" + +reset_trigger + +echo "Test stacktrace semantic errors" + +! echo "stacktrace:foo" > events/sched/sched_process_fork/trigger +echo "stacktrace" > events/sched/sched_process_fork/trigger +! echo "stacktrace" > events/sched/sched_process_fork/trigger + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc new file mode 100644 index 000000000000..87648e5f987c --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc @@ -0,0 +1,58 @@ +#!/bin/sh +# description: event trigger - test traceon/off trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test traceoff trigger" +echo 1 > tracing_on +echo 'traceoff' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat tracing_on` -ne 0 ]; then + fail "traceoff trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test traceon trigger" +echo 0 > tracing_on +echo 'traceon' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat tracing_on` -ne 1 ]; then + fail "traceoff trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test semantic error for traceoff/on trigger" +! echo 'traceoff:badparam' > events/sched/sched_process_fork/trigger +! echo 'traceoff+0' > events/sched/sched_process_fork/trigger +echo 'traceon' > events/sched/sched_process_fork/trigger +! echo 'traceon' > events/sched/sched_process_fork/trigger +! echo 'traceoff' > events/sched/sched_process_fork/trigger + +do_reset + +exit 0 -- cgit From 76929ab51f0ee398bcf72286c4b377051f07a31b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Mar 2016 12:55:01 -0600 Subject: kselftests/ftrace: Add hist trigger testcases Add the hist trigger testcases for ftracetest. This checks the basic histogram trigger behaviors like as; - Histogram trigger itself - Histogram with string key - Histogram with compound keys - Histogram with sort key - Histogram trigger modifiers (execname, hex, syscall) - Multiple histograms on an event - Named histogram - Named histogram on multi events Here is the test result. ---- # ./ftracetest test.d/trigger/*hist*.tc === Ftrace unit tests === [1] event trigger - test histogram modifiers [PASS] [2] event trigger - test histogram trigger [PASS] [3] event trigger - test multiple histogram triggers [PASS] # of passed: 3 # of failed: 0 # of unresolved: 0 # of untested: 0 # of unsupported: 0 # of xfailed: 0 # of undefined(test bug): 0 ---- Link: http://lkml.kernel.org/r/17cb3a3d9eeadc3282645147905455a298e7fbeb.1457029949.git.tom.zanussi@linux.intel.com Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Shuah Khan Cc: Namhyung Kim Cc: Tom Zanussi Signed-off-by: Tom Zanussi [Tom Zanussi: Change multihist test from truncate ('>') to append ('>>')] Reviewed-by: Namhyung Kim Signed-off-by: Steven Rostedt --- .../ftrace/test.d/trigger/trigger-hist-mod.tc | 65 +++++++++++++++++ .../ftrace/test.d/trigger/trigger-hist.tc | 83 ++++++++++++++++++++++ .../ftrace/test.d/trigger/trigger-multihist.tc | 73 +++++++++++++++++++ 3 files changed, 221 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc (limited to 'tools/testing') diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc new file mode 100644 index 000000000000..57e350a0bcca --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc @@ -0,0 +1,65 @@ +#!/bin/sh +# description: event trigger - test histogram modifiers + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep hist events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +echo "Test histogram with execname modifier" + +echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +COMM=`cat /proc/$$/comm` +grep "common_pid: $COMM" events/sched/sched_process_fork/hist > /dev/null || \ + fail "execname modifier on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with hex modifier" + +echo 'hist:keys=parent_pid.hex' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +# Note that $$ is the parent pid. $PID is current PID. +HEX=`printf %x $PID` +grep "parent_pid: $HEX" events/sched/sched_process_fork/hist > /dev/null || \ + fail "hex modifier on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with syscall modifier" + +echo 'hist:keys=id.syscall' > events/raw_syscalls/sys_exit/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep "id: sys_" events/raw_syscalls/sys_exit/hist > /dev/null || \ + fail "syscall modifier on raw_syscalls/sys_exit did not work" + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc new file mode 100644 index 000000000000..b2902d42a537 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc @@ -0,0 +1,83 @@ +#!/bin/sh +# description: event trigger - test histogram trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep hist events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +echo "Test histogram basic tigger" + +echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep parent_pid events/sched/sched_process_fork/hist > /dev/null || \ + fail "hist trigger on sched_process_fork did not work" +grep child events/sched/sched_process_fork/hist > /dev/null || \ + fail "hist trigger on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with compound keys" + +echo 'hist:keys=parent_pid,child_pid' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep '^{ parent_pid:.*, child_pid:.*}' events/sched/sched_process_fork/hist > /dev/null || \ + fail "compound keys on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with string key" + +echo 'hist:keys=parent_comm' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +COMM=`cat /proc/$$/comm` +grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \ + fail "string key on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with sort key" + +echo 'hist:keys=parent_pid,child_pid:sort=child_pid.ascending' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done + +check_inc() { + while [ $# -gt 1 ]; do + [ $1 -gt $2 ] && return 1 + shift 1 + done + return 0 +} +check_inc `grep -o "child_pid:[[:space:]]*[[:digit:]]*" \ + events/sched/sched_process_fork/hist | cut -d: -f2 ` || + fail "sort param on sched_process_fork did not work" + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc new file mode 100644 index 000000000000..03c4a46561fc --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc @@ -0,0 +1,73 @@ +#!/bin/sh +# description: event trigger - test multiple histogram triggers + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep hist events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +reset_trigger + +echo "Test histogram multiple tiggers" + +echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger +echo 'hist:keys=parent_comm:vals=child_pid' >> events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep parent_pid events/sched/sched_process_fork/hist > /dev/null || \ + fail "hist trigger on sched_process_fork did not work" +grep child events/sched/sched_process_fork/hist > /dev/null || \ + fail "hist trigger on sched_process_fork did not work" +COMM=`cat /proc/$$/comm` +grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \ + fail "string key on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with its name" + +echo 'hist:name=test_hist:keys=common_pid' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep test_hist events/sched/sched_process_fork/hist > /dev/null || \ + fail "named event on sched_process_fork did not work" + +echo "Test same named histogram on different events" + +echo 'hist:name=test_hist:keys=common_pid' > events/sched/sched_process_exit/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep test_hist events/sched/sched_process_exit/hist > /dev/null || \ + fail "named event on sched_process_fork did not work" + +diffs=`diff events/sched/sched_process_exit/hist events/sched/sched_process_fork/hist | wc -l` +test $diffs -eq 0 || fail "Same name histograms are not same" + +reset_trigger + +do_reset + +exit 0 -- cgit From 93c5f671f25cb7a73492b7a96b426c4fb1efa715 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Mar 2016 12:55:03 -0600 Subject: kselftests/ftrace: Add a test for log2 modifier of hist trigger Add a test for log2 modifier of hist trigger in hist_mod.tc. Here is the test result. ---- # ./ftracetest test.d/trigger/trigger-hist-mod.tc === Ftrace unit tests === [1] event trigger - test histogram modifiers [PASS] # of passed: 1 # of failed: 0 # of unresolved: 0 # of untested: 0 # of unsupported: 0 # of xfailed: 0 # of undefined(test bug): 0 ---- Link: http://lkml.kernel.org/r/3f1ab735c06a50b1b40d3e96b8b6a3e5ea62fd86.1457029949.git.tom.zanussi@linux.intel.com Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Shuah Khan Cc: Namhyung Kim Cc: Tom Zanussi Tested-by: Tom Zanussi Signed-off-by: Steven Rostedt --- .../selftests/ftrace/test.d/trigger/trigger-hist-mod.tc | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc index 57e350a0bcca..c2b61c4fda11 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc @@ -60,6 +60,16 @@ for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done grep "id: sys_" events/raw_syscalls/sys_exit/hist > /dev/null || \ fail "syscall modifier on raw_syscalls/sys_exit did not work" + +reset_trigger + +echo "Test histgram with log2 modifier" + +echo 'hist:keys=bytes_req.log2' > events/kmem/kmalloc/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep 'bytes_req: ~ 2^[0-9]*' events/kmem/kmalloc/hist > /dev/null || \ + fail "log2 modifier on kmem/kmalloc did not work" + do_reset exit 0 -- cgit From 200c79da824c978fcf6eec1dc9c0a1e521133267 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 22 Mar 2016 00:22:16 -0700 Subject: libnvdimm, pmem, pfn: make pmem_rw_bytes generic and refactor pfn setup In preparation for providing an alternative (to block device) access mechanism to persistent memory, convert pmem_rw_bytes() to nsio_rw_bytes(). This allows ->rw_bytes() functionality without requiring a 'struct pmem_device' to be instantiated. In other words, when ->rw_bytes() is in use i/o is driven through 'struct nd_namespace_io', otherwise it is driven through 'struct pmem_device' and the block layer. This consolidates the disjoint calls to devm_exit_badblocks() and devm_memunmap() into a common devm_nsio_disable() and cleans up the init path to use a unified pmem_attach_disk() implementation. Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- drivers/nvdimm/blk.c | 2 +- drivers/nvdimm/btt_devs.c | 4 +- drivers/nvdimm/claim.c | 61 ++++++++++ drivers/nvdimm/nd.h | 40 +++++-- drivers/nvdimm/pfn_devs.c | 4 +- drivers/nvdimm/pmem.c | 236 ++++++++++++++------------------------ include/linux/nd.h | 9 +- tools/testing/nvdimm/Kbuild | 1 + tools/testing/nvdimm/test/iomap.c | 27 +++-- 9 files changed, 211 insertions(+), 173 deletions(-) (limited to 'tools/testing') diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 4c14ecdc792b..495e06d9f7e7 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -324,7 +324,7 @@ static int nd_blk_probe(struct device *dev) ndns->rw_bytes = nsblk_rw_bytes; if (is_nd_btt(dev)) return nvdimm_namespace_attach_btt(ndns); - else if (nd_btt_probe(dev, ndns, nsblk) == 0) { + else if (nd_btt_probe(dev, ndns) == 0) { /* we'll come back as btt-blk */ return -ENXIO; } else diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 1886171af80e..816d0dae6398 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -273,8 +273,7 @@ static int __nd_btt_probe(struct nd_btt *nd_btt, return 0; } -int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata) +int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns) { int rc; struct device *btt_dev; @@ -289,7 +288,6 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns, nvdimm_bus_unlock(&ndns->dev); if (!btt_dev) return -ENOMEM; - dev_set_drvdata(btt_dev, drvdata); btt_sb = devm_kzalloc(dev, sizeof(*btt_sb), GFP_KERNEL); rc = __nd_btt_probe(to_nd_btt(btt_dev), ndns, btt_sb); dev_dbg(dev, "%s: btt: %s\n", __func__, diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index e8f03b0e95e4..6bbd0a36994a 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -12,6 +12,7 @@ */ #include #include +#include #include "nd-core.h" #include "pfn.h" #include "btt.h" @@ -199,3 +200,63 @@ u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb) return sum; } EXPORT_SYMBOL(nd_sb_checksum); + +static int nsio_rw_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *buf, size_t size, int rw) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + + if (unlikely(offset + size > nsio->size)) { + dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); + return -EFAULT; + } + + if (rw == READ) { + unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512); + + if (unlikely(is_bad_pmem(&nsio->bb, offset / 512, sz_align))) + return -EIO; + return memcpy_from_pmem(buf, nsio->addr + offset, size); + } else { + memcpy_to_pmem(nsio->addr + offset, buf, size); + wmb_pmem(); + } + + return 0; +} + +int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio) +{ + struct resource *res = &nsio->res; + struct nd_namespace_common *ndns = &nsio->common; + + nsio->size = resource_size(res); + if (!devm_request_mem_region(dev, res->start, resource_size(res), + dev_name(dev))) { + dev_warn(dev, "could not reserve region %pR\n", res); + return -EBUSY; + } + + ndns->rw_bytes = nsio_rw_bytes; + if (devm_init_badblocks(dev, &nsio->bb)) + return -ENOMEM; + nvdimm_badblocks_populate(to_nd_region(ndns->dev.parent), &nsio->bb, + &nsio->res); + + nsio->addr = devm_memremap(dev, res->start, resource_size(res), + ARCH_MEMREMAP_PMEM); + if (IS_ERR(nsio->addr)) + return PTR_ERR(nsio->addr); + return 0; +} +EXPORT_SYMBOL_GPL(devm_nsio_enable); + +void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio) +{ + struct resource *res = &nsio->res; + + devm_memunmap(dev, nsio->addr); + devm_exit_badblocks(dev, &nsio->bb); + devm_release_mem_region(dev, res->start, resource_size(res)); +} +EXPORT_SYMBOL_GPL(devm_nsio_disable); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 0fb14890ba26..10e23fe49012 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -13,6 +13,7 @@ #ifndef __ND_H__ #define __ND_H__ #include +#include #include #include #include @@ -197,13 +198,12 @@ struct nd_gen_sb { u64 nd_sb_checksum(struct nd_gen_sb *sb); #if IS_ENABLED(CONFIG_BTT) -int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata); +int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_btt(struct device *dev); struct device *nd_btt_create(struct nd_region *nd_region); #else static inline int nd_btt_probe(struct device *dev, - struct nd_namespace_common *ndns, void *drvdata) + struct nd_namespace_common *ndns) { return -ENODEV; } @@ -221,14 +221,13 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region) struct nd_pfn *to_nd_pfn(struct device *dev); #if IS_ENABLED(CONFIG_NVDIMM_PFN) -int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata); +int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_pfn(struct device *dev); struct device *nd_pfn_create(struct nd_region *nd_region); int nd_pfn_validate(struct nd_pfn *nd_pfn); #else -static inline int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata) +static inline int nd_pfn_probe(struct device *dev, + struct nd_namespace_common *ndns) { return -ENODEV; } @@ -272,6 +271,20 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, char *name); void nvdimm_badblocks_populate(struct nd_region *nd_region, struct badblocks *bb, const struct resource *res); +#if IS_ENABLED(CONFIG_ND_CLAIM) +int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio); +void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio); +#else +static inline int devm_nsio_enable(struct device *dev, + struct nd_namespace_io *nsio) +{ + return -ENXIO; +} +static inline void devm_nsio_disable(struct device *dev, + struct nd_namespace_io *nsio) +{ +} +#endif int nd_blk_region_init(struct nd_region *nd_region); void __nd_iostat_start(struct bio *bio, unsigned long *start); static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) @@ -285,6 +298,19 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) return true; } void nd_iostat_end(struct bio *bio, unsigned long start); +static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector, + unsigned int len) +{ + if (bb->count) { + sector_t first_bad; + int num_bad; + + return !!badblocks_check(bb, sector, len / 512, &first_bad, + &num_bad); + } + + return false; +} resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk); const u8 *nd_dev_to_uuid(struct device *dev); bool pmem_should_map_pages(struct device *dev); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 96aa5490c279..9df081ae96e3 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -410,8 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) } EXPORT_SYMBOL(nd_pfn_validate); -int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata) +int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) { int rc; struct nd_pfn *nd_pfn; @@ -427,7 +426,6 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, nvdimm_bus_unlock(&ndns->dev); if (!pfn_dev) return -ENOMEM; - dev_set_drvdata(pfn_dev, drvdata); pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); nd_pfn = to_nd_pfn(pfn_dev); nd_pfn->pfn_sb = pfn_sb; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 67d48e2e8ca2..b5f81b02205c 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -49,19 +49,6 @@ struct pmem_device { struct badblocks bb; }; -static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) -{ - if (bb->count) { - sector_t first_bad; - int num_bad; - - return !!badblocks_check(bb, sector, len / 512, &first_bad, - &num_bad); - } - - return false; -} - static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, unsigned int len) { @@ -209,16 +196,40 @@ void pmem_release_disk(void *disk) put_disk(disk); } -static struct pmem_device *pmem_alloc(struct device *dev, - struct resource *res, int id) +static struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct resource *res, struct vmem_altmap *altmap); + +static int pmem_attach_disk(struct device *dev, + struct nd_namespace_common *ndns) { + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct vmem_altmap __altmap, *altmap = NULL; + struct resource *res = &nsio->res; + struct nd_pfn *nd_pfn = NULL; + int nid = dev_to_node(dev); + struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; + struct resource pfn_res; struct request_queue *q; + struct gendisk *disk; + void *addr; + + /* while nsio_rw_bytes is active, parse a pfn info block if present */ + if (is_nd_pfn(dev)) { + nd_pfn = to_nd_pfn(dev); + altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap); + if (IS_ERR(altmap)) + return PTR_ERR(altmap); + } + + /* we're attaching a block device, disable raw namespace access */ + devm_nsio_disable(dev, nsio); pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) - return ERR_PTR(-ENOMEM); + return -ENOMEM; + dev_set_drvdata(dev, pmem); pmem->phys_addr = res->start; pmem->size = resource_size(res); if (!arch_has_wmb_pmem()) @@ -227,22 +238,31 @@ static struct pmem_device *pmem_alloc(struct device *dev, if (!devm_request_mem_region(dev, res->start, resource_size(res), dev_name(dev))) { dev_warn(dev, "could not reserve region %pR\n", res); - return ERR_PTR(-EBUSY); + return -EBUSY; } q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); if (!q) - return ERR_PTR(-ENOMEM); + return -ENOMEM; + pmem->pmem_queue = q; pmem->pfn_flags = PFN_DEV; - if (pmem_should_map_pages(dev)) { - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res, + if (is_nd_pfn(dev)) { + addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter, + altmap); + pfn_sb = nd_pfn->pfn_sb; + pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); + pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res); + pmem->pfn_flags |= PFN_MAP; + res = &pfn_res; /* for badblocks populate */ + res->start += pmem->data_offset; + } else if (pmem_should_map_pages(dev)) { + addr = devm_memremap_pages(dev, &nsio->res, &q->q_usage_counter, NULL); pmem->pfn_flags |= PFN_MAP; } else - pmem->virt_addr = (void __pmem *) devm_memremap(dev, - pmem->phys_addr, pmem->size, - ARCH_MEMREMAP_PMEM); + addr = devm_memremap(dev, pmem->phys_addr, + pmem->size, ARCH_MEMREMAP_PMEM); /* * At release time the queue must be dead before @@ -250,23 +270,12 @@ static struct pmem_device *pmem_alloc(struct device *dev, */ if (devm_add_action(dev, pmem_release_queue, q)) { blk_cleanup_queue(q); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } - if (IS_ERR(pmem->virt_addr)) - return (void __force *) pmem->virt_addr; - - pmem->pmem_queue = q; - return pmem; -} - -static int pmem_attach_disk(struct device *dev, - struct nd_namespace_common *ndns, struct pmem_device *pmem) -{ - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - int nid = dev_to_node(dev); - struct resource bb_res; - struct gendisk *disk; + if (IS_ERR(addr)) + return PTR_ERR(addr); + pmem->virt_addr = (void __pmem *) addr; blk_queue_make_request(pmem->pmem_queue, pmem_make_request); blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE); @@ -291,20 +300,9 @@ static int pmem_attach_disk(struct device *dev, set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) / 512); pmem->pmem_disk = disk; - devm_exit_badblocks(dev, &pmem->bb); if (devm_init_badblocks(dev, &pmem->bb)) return -ENOMEM; - bb_res.start = nsio->res.start + pmem->data_offset; - bb_res.end = nsio->res.end; - if (is_nd_pfn(dev)) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); - struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; - - bb_res.start += __le32_to_cpu(pfn_sb->start_pad); - bb_res.end -= __le32_to_cpu(pfn_sb->end_trunc); - } - nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, - &bb_res); + nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res); disk->bb = &pmem->bb; add_disk(disk); revalidate_disk(disk); @@ -312,33 +310,8 @@ static int pmem_attach_disk(struct device *dev, return 0; } -static int pmem_rw_bytes(struct nd_namespace_common *ndns, - resource_size_t offset, void *buf, size_t size, int rw) -{ - struct pmem_device *pmem = dev_get_drvdata(ndns->claim); - - if (unlikely(offset + size > pmem->size)) { - dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); - return -EFAULT; - } - - if (rw == READ) { - unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512); - - if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align))) - return -EIO; - return memcpy_from_pmem(buf, pmem->virt_addr + offset, size); - } else { - memcpy_to_pmem(pmem->virt_addr + offset, buf, size); - wmb_pmem(); - } - - return 0; -} - static int nd_pfn_init(struct nd_pfn *nd_pfn) { - struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev); struct nd_namespace_common *ndns = nd_pfn->ndns; u32 start_pad = 0, end_trunc = 0; resource_size_t start, size; @@ -404,7 +377,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * ->direct_access() to those that are included in the memmap. */ start += start_pad; - npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K; + size = resource_size(&nsio->res); + npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K; if (nd_pfn->mode == PFN_MODE_PMEM) offset = ALIGN(start + SZ_8K + 64 * npfns, nd_pfn->align) - start; @@ -413,13 +387,13 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) else return -ENXIO; - if (offset + start_pad + end_trunc >= pmem->size) { + if (offset + start_pad + end_trunc >= size) { dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", dev_name(&ndns->dev)); return -ENXIO; } - npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K; + npfns = (size - offset - start_pad - end_trunc) / SZ_4K; pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); @@ -456,17 +430,14 @@ static unsigned long init_altmap_reserve(resource_size_t base) return reserve; } -static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) +static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct resource *res, struct vmem_altmap *altmap) { - struct resource res; - struct request_queue *q; - struct pmem_device *pmem; - struct vmem_altmap *altmap; - struct device *dev = &nd_pfn->dev; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; - struct nd_namespace_common *ndns = nd_pfn->ndns; + u64 offset = le64_to_cpu(pfn_sb->dataoff); u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t base = nsio->res.start + start_pad; struct vmem_altmap __altmap = { @@ -474,112 +445,75 @@ static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) .reserve = init_altmap_reserve(base), }; - pmem = dev_get_drvdata(dev); - pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); - pmem->pfn_pad = start_pad + end_trunc; + memcpy(res, &nsio->res, sizeof(*res)); + res->start += start_pad; + res->end -= end_trunc; + nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); if (nd_pfn->mode == PFN_MODE_RAM) { - if (pmem->data_offset < SZ_8K) - return -EINVAL; + if (offset < SZ_8K) + return ERR_PTR(-EINVAL); nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); altmap = NULL; } else if (nd_pfn->mode == PFN_MODE_PMEM) { - nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset) - / PAGE_SIZE; + nd_pfn->npfns = (resource_size(res) - offset) / PAGE_SIZE; if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) dev_info(&nd_pfn->dev, "number of pfns truncated from %lld to %ld\n", le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); - altmap = & __altmap; - altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K); + memcpy(altmap, &__altmap, sizeof(*altmap)); + altmap->free = PHYS_PFN(offset - SZ_8K); altmap->alloc = 0; } else - return -ENXIO; + return ERR_PTR(-ENXIO); - /* establish pfn range for lookup, and switch to direct map */ - q = pmem->pmem_queue; - memcpy(&res, &nsio->res, sizeof(res)); - res.start += start_pad; - res.end -= end_trunc; - devm_remove_action(dev, pmem_release_queue, q); - devm_memunmap(dev, (void __force *) pmem->virt_addr); - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res, - &q->q_usage_counter, altmap); - pmem->pfn_flags |= PFN_MAP; - - /* - * At release time the queue must be dead before - * devm_memremap_pages is unwound - */ - if (devm_add_action(dev, pmem_release_queue, q)) { - blk_cleanup_queue(q); - return -ENOMEM; - } - if (IS_ERR(pmem->virt_addr)) - return PTR_ERR(pmem->virt_addr); - - /* attach pmem disk in "pfn-mode" */ - return pmem_attach_disk(dev, ndns, pmem); + return altmap; } -static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +/* + * Determine the effective resource range and vmem_altmap from an nd_pfn + * instance. + */ +static struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct resource *res, struct vmem_altmap *altmap) { - struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); int rc; if (!nd_pfn->uuid || !nd_pfn->ndns) - return -ENODEV; + return ERR_PTR(-ENODEV); rc = nd_pfn_init(nd_pfn); if (rc) - return rc; + return ERR_PTR(rc); + /* we need a valid pfn_sb before we can init a vmem_altmap */ - return __nvdimm_namespace_attach_pfn(nd_pfn); + return __nvdimm_setup_pfn(nd_pfn, res, altmap); } static int nd_pmem_probe(struct device *dev) { - struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_namespace_common *ndns; - struct nd_namespace_io *nsio; - struct pmem_device *pmem; ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) return PTR_ERR(ndns); - nsio = to_nd_namespace_io(&ndns->dev); - pmem = pmem_alloc(dev, &nsio->res, nd_region->id); - if (IS_ERR(pmem)) - return PTR_ERR(pmem); - - dev_set_drvdata(dev, pmem); - ndns->rw_bytes = pmem_rw_bytes; - if (devm_init_badblocks(dev, &pmem->bb)) - return -ENOMEM; - nvdimm_badblocks_populate(nd_region, &pmem->bb, &nsio->res); + if (devm_nsio_enable(dev, to_nd_namespace_io(&ndns->dev))) + return -ENXIO; - if (is_nd_btt(dev)) { - /* btt allocates its own request_queue */ - devm_remove_action(dev, pmem_release_queue, pmem->pmem_queue); - blk_cleanup_queue(pmem->pmem_queue); + if (is_nd_btt(dev)) return nvdimm_namespace_attach_btt(ndns); - } if (is_nd_pfn(dev)) - return nvdimm_namespace_attach_pfn(ndns); + return pmem_attach_disk(dev, ndns); - if (nd_btt_probe(dev, ndns, pmem) == 0 - || nd_pfn_probe(dev, ndns, pmem) == 0) { - /* - * We'll come back as either btt-pmem, or pfn-pmem, so - * drop the queue allocation for now. - */ + /* if we find a valid info-block we'll come back as that personality */ + if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0) return -ENXIO; - } - return pmem_attach_disk(dev, ndns, pmem); + /* ...otherwise we're just a raw pmem device */ + return pmem_attach_disk(dev, ndns); } static int nd_pmem_remove(struct device *dev) diff --git a/include/linux/nd.h b/include/linux/nd.h index 5ea4aec7fd63..aee2761d294c 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -15,6 +15,7 @@ #include #include #include +#include enum nvdimm_event { NVDIMM_REVALIDATE_POISON, @@ -55,13 +56,19 @@ static inline struct nd_namespace_common *to_ndns(struct device *dev) } /** - * struct nd_namespace_io - infrastructure for loading an nd_pmem instance + * struct nd_namespace_io - device representation of a persistent memory range * @dev: namespace device created by the nd region driver * @res: struct resource conversion of a NFIT SPA table + * @size: cached resource_size(@res) for fast path size checks + * @addr: virtual address to access the namespace range + * @bb: badblocks list for the namespace range */ struct nd_namespace_io { struct nd_namespace_common common; struct resource res; + resource_size_t size; + void __pmem *addr; + struct badblocks bb; }; /** diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index a34bfd0c8928..d5bc8c080b44 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -7,6 +7,7 @@ ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap ldflags-y += --wrap=memunmap ldflags-y += --wrap=__devm_request_region +ldflags-y += --wrap=__devm_release_region ldflags-y += --wrap=__request_region ldflags-y += --wrap=__release_region ldflags-y += --wrap=devm_memremap_pages diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 0c1a7e65bb81..c842095f2801 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -239,13 +239,11 @@ struct resource *__wrap___devm_request_region(struct device *dev, } EXPORT_SYMBOL(__wrap___devm_request_region); -void __wrap___release_region(struct resource *parent, resource_size_t start, - resource_size_t n) +static bool nfit_test_release_region(struct resource *parent, + resource_size_t start, resource_size_t n) { - struct nfit_test_resource *nfit_res; - if (parent == &iomem_resource) { - nfit_res = get_nfit_res(start); + struct nfit_test_resource *nfit_res = get_nfit_res(start); if (nfit_res) { struct resource *res = nfit_res->res + 1; @@ -254,11 +252,26 @@ void __wrap___release_region(struct resource *parent, resource_size_t start, __func__, start, n, res); else memset(res, 0, sizeof(*res)); - return; + return true; } } - __release_region(parent, start, n); + return false; +} + +void __wrap___release_region(struct resource *parent, resource_size_t start, + resource_size_t n) +{ + if (!nfit_test_release_region(parent, start, n)) + __release_region(parent, start, n); } EXPORT_SYMBOL(__wrap___release_region); +void __wrap___devm_release_region(struct device *dev, struct resource *parent, + resource_size_t start, resource_size_t n) +{ + if (!nfit_test_release_region(parent, start, n)) + __devm_release_region(dev, parent, start, n); +} +EXPORT_SYMBOL(__wrap___devm_release_region); + MODULE_LICENSE("GPL v2"); -- cgit From e3654eca70d63704c94a60a2aafc0b3c7b46a00b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 28 Apr 2016 16:17:07 -0700 Subject: nfit, libnvdimm: clarify "commands" vs "_DSMs" Clarify the distinction between "commands", the ioctls userspace calls to request the kernel take some action on a given dimm device, and "_DSMs", the actual function numbers used in the firmware interface to the DIMM. _DSMs are ACPI specific whereas commands are Linux kernel generic. This is in preparation for breaking the 1:1 implicit relationship between the kernel ioctl number space and the firmware specific function numbers. Cc: Jerry Hoemann Cc: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 21 +++++++++++++-------- drivers/acpi/nfit.h | 4 ++-- drivers/nvdimm/bus.c | 8 ++++---- drivers/nvdimm/core.c | 2 +- drivers/nvdimm/dimm_devs.c | 18 ++++++++++++------ drivers/nvdimm/nd-core.h | 2 +- include/linux/libnvdimm.h | 5 +++-- tools/testing/nvdimm/test/nfit.c | 27 ++++++++++++++------------- 8 files changed, 50 insertions(+), 37 deletions(-) (limited to 'tools/testing') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index d0f35e63640b..1b98e9dc6138 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -175,7 +175,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, union acpi_object in_obj, in_buf, *out_obj; struct device *dev = acpi_desc->dev; const char *cmd_name, *dimm_name; - unsigned long dsm_mask; + unsigned long cmd_mask; acpi_handle handle; const u8 *uuid; u32 offset; @@ -189,7 +189,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, return -ENOTTY; dimm_name = nvdimm_name(nvdimm); cmd_name = nvdimm_cmd_name(cmd); - dsm_mask = nfit_mem->dsm_mask; + cmd_mask = nvdimm_cmd_mask(nvdimm); desc = nd_cmd_dimm_desc(cmd); uuid = to_nfit_uuid(NFIT_DEV_DIMM); handle = adev->handle; @@ -197,7 +197,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct acpi_device *adev = to_acpi_dev(acpi_desc); cmd_name = nvdimm_bus_cmd_name(cmd); - dsm_mask = nd_desc->dsm_mask; + cmd_mask = nd_desc->cmd_mask; desc = nd_cmd_bus_desc(cmd); uuid = to_nfit_uuid(NFIT_DEV_BUS); handle = adev->handle; @@ -207,7 +207,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, if (!desc || (cmd && (desc->out_num + desc->in_num == 0))) return -ENOTTY; - if (!test_bit(cmd, &dsm_mask)) + if (!test_bit(cmd, &cmd_mask)) return -ENOTTY; in_obj.type = ACPI_TYPE_PACKAGE; @@ -926,7 +926,8 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM); int i; - nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en; + /* nfit test assumes 1:1 relationship between commands and dsms */ + nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en; adev = to_acpi_dev(acpi_desc); if (!adev) return 0; @@ -976,9 +977,13 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) if (rc) continue; + /* + * For now there is 1:1 relationship between cmd_mask and + * dsm_mask. + */ nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem, acpi_nfit_dimm_attribute_groups, - flags, &nfit_mem->dsm_mask); + flags, nfit_mem->dsm_mask); if (!nvdimm) return -ENOMEM; @@ -1007,14 +1012,14 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) struct acpi_device *adev; int i; - nd_desc->dsm_mask = acpi_desc->bus_dsm_force_en; + nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; adev = to_acpi_dev(acpi_desc); if (!adev) return; for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i)) - set_bit(i, &nd_desc->dsm_mask); + set_bit(i, &nd_desc->cmd_mask); } static ssize_t range_index_show(struct device *dev, diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index c75576b2d50e..332ee6f01662 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -132,8 +132,8 @@ struct acpi_nfit_desc { size_t ars_status_size; struct work_struct work; unsigned int cancel:1; - unsigned long dimm_dsm_force_en; - unsigned long bus_dsm_force_en; + unsigned long dimm_cmd_force_en; + unsigned long bus_cmd_force_en; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, void *iobuf, u64 len, int rw); }; diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 19f822d7f652..cb2042a12b76 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -589,24 +589,24 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, void __user *p = (void __user *) arg; struct device *dev = &nvdimm_bus->dev; const char *cmd_name, *dimm_name; - unsigned long dsm_mask; + unsigned long cmd_mask; void *buf; int rc, i; if (nvdimm) { desc = nd_cmd_dimm_desc(cmd); cmd_name = nvdimm_cmd_name(cmd); - dsm_mask = nvdimm->dsm_mask ? *(nvdimm->dsm_mask) : 0; + cmd_mask = nvdimm->cmd_mask; dimm_name = dev_name(&nvdimm->dev); } else { desc = nd_cmd_bus_desc(cmd); cmd_name = nvdimm_bus_cmd_name(cmd); - dsm_mask = nd_desc->dsm_mask; + cmd_mask = nd_desc->cmd_mask; dimm_name = "bus"; } if (!desc || (desc->out_num + desc->in_num == 0) || - !test_bit(cmd, &dsm_mask)) + !test_bit(cmd, &cmd_mask)) return -ENOTTY; /* fail write commands (when read-only) */ diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 182a93fe3712..e8688a13cf4f 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -251,7 +251,7 @@ static ssize_t commands_show(struct device *dev, struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; - for_each_set_bit(cmd, &nd_desc->dsm_mask, BITS_PER_LONG) + for_each_set_bit(cmd, &nd_desc->cmd_mask, BITS_PER_LONG) len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd)); len += sprintf(buf + len, "\n"); return len; diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index c56f88217924..79a35a02053c 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -37,9 +37,9 @@ static int __validate_dimm(struct nvdimm_drvdata *ndd) nvdimm = to_nvdimm(ndd->dev); - if (!nvdimm->dsm_mask) + if (!nvdimm->cmd_mask) return -ENXIO; - if (!test_bit(ND_CMD_GET_CONFIG_DATA, nvdimm->dsm_mask)) + if (!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) return -ENXIO; return 0; @@ -263,6 +263,12 @@ const char *nvdimm_name(struct nvdimm *nvdimm) } EXPORT_SYMBOL_GPL(nvdimm_name); +unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm) +{ + return nvdimm->cmd_mask; +} +EXPORT_SYMBOL_GPL(nvdimm_cmd_mask); + void *nvdimm_provider_data(struct nvdimm *nvdimm) { if (nvdimm) @@ -277,10 +283,10 @@ static ssize_t commands_show(struct device *dev, struct nvdimm *nvdimm = to_nvdimm(dev); int cmd, len = 0; - if (!nvdimm->dsm_mask) + if (!nvdimm->cmd_mask) return sprintf(buf, "\n"); - for_each_set_bit(cmd, nvdimm->dsm_mask, BITS_PER_LONG) + for_each_set_bit(cmd, &nvdimm->cmd_mask, BITS_PER_LONG) len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd)); len += sprintf(buf + len, "\n"); return len; @@ -340,7 +346,7 @@ EXPORT_SYMBOL_GPL(nvdimm_attribute_group); struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, - unsigned long *dsm_mask) + unsigned long cmd_mask) { struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); struct device *dev; @@ -355,7 +361,7 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, } nvdimm->provider_data = provider_data; nvdimm->flags = flags; - nvdimm->dsm_mask = dsm_mask; + nvdimm->cmd_mask = cmd_mask; atomic_set(&nvdimm->busy, 0); dev = &nvdimm->dev; dev_set_name(dev, "nmem%d", nvdimm->id); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 1d1500f3d8b5..da0d322ed7cb 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -37,7 +37,7 @@ struct nvdimm_bus { struct nvdimm { unsigned long flags; void *provider_data; - unsigned long *dsm_mask; + unsigned long cmd_mask; struct device dev; atomic_t busy; int id; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index af31d1c6fdd7..0c3c30cbbea5 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -68,7 +68,7 @@ struct nd_mapping { struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; - unsigned long dsm_mask; + unsigned long cmd_mask; char *provider_name; ndctl_fn ndctl; int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); @@ -130,10 +130,11 @@ struct nd_region *to_nd_region(struct device *dev); struct nd_blk_region *to_nd_blk_region(struct device *dev); struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); const char *nvdimm_name(struct nvdimm *nvdimm); +unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm); void *nvdimm_provider_data(struct nvdimm *nvdimm); struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, - unsigned long *dsm_mask); + unsigned long cmd_mask); const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 3187322eeed7..ed899a411c22 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -344,8 +344,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, if (nvdimm) { struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm); - if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) + if (!nfit_mem || !test_bit(cmd, &cmd_mask)) return -ENOTTY; /* lookup label space for the given dimm */ @@ -374,7 +375,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, } else { struct ars_state *ars_state = &t->ars_state; - if (!nd_desc || !test_bit(cmd, &nd_desc->dsm_mask)) + if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask)) return -ENOTTY; switch (cmd) { @@ -1251,13 +1252,13 @@ static void nfit_test0_setup(struct nfit_test *t) post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE); acpi_desc = &t->acpi_desc; - set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); - set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); - set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); - set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); } static void nfit_test1_setup(struct nfit_test *t) @@ -1315,10 +1316,10 @@ static void nfit_test1_setup(struct nfit_test *t) post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE); acpi_desc = &t->acpi_desc; - set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, -- cgit From 6634fb06906f52a3a3125e88681a7fa6e353f31d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 27 Apr 2016 16:46:15 -0600 Subject: tools/testing/nvdimm: ND_CMD_CALL support Enable nfit_test to use nd_cmd_pkg marshaling. Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index ed899a411c22..e09a300eb8e0 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -336,6 +336,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc); + unsigned int func = cmd; int i, rc = 0, __cmd_rc; if (!cmd_rc) @@ -346,7 +347,21 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm); - if (!nfit_mem || !test_bit(cmd, &cmd_mask)) + if (!nfit_mem) + return -ENOTTY; + + if (cmd == ND_CMD_CALL) { + struct nd_cmd_pkg *call_pkg = buf; + + buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out; + buf = (void *) call_pkg->nd_payload; + func = call_pkg->nd_command; + if (call_pkg->nd_family != nfit_mem->family) + return -ENOTTY; + } + + if (!test_bit(cmd, &cmd_mask) + || !test_bit(func, &nfit_mem->dsm_mask)) return -ENOTTY; /* lookup label space for the given dimm */ @@ -357,7 +372,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, if (i >= ARRAY_SIZE(handle)) return -ENXIO; - switch (cmd) { + switch (func) { case ND_CMD_GET_CONFIG_SIZE: rc = nfit_test_cmd_get_config_size(buf, buf_len); break; @@ -378,7 +393,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask)) return -ENOTTY; - switch (cmd) { + switch (func) { case ND_CMD_ARS_CAP: rc = nfit_test_cmd_ars_cap(buf, buf_len); break; -- cgit From 91e6f1ce8657795cec83a81090c20cbaa8337c68 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 9 May 2016 18:10:00 -0400 Subject: ftracetest: Add instance created, delete, read and enable event test Add a new ftrace test that creates three threads. One that creates and removes an ftrace instance, one that reads the instance, and one that enables and disables events in the instance. This is a stress test for accessing and removing instances at the same time. Signed-off-by: Steven Rostedt --- .../ftrace/test.d/instances/instance-event.tc | 143 +++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/instances/instance-event.tc (limited to 'tools/testing') diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc new file mode 100644 index 000000000000..5f2abd03f16b --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc @@ -0,0 +1,143 @@ +#!/bin/sh +# description: Test creation and deletion of trace instances while setting an event + +if [ ! -d instances ] ; then + echo "no instance directory with this kernel" + exit_unsupported; +fi + +fail() { # mesg + rmdir foo 2>/dev/null + echo $1 + set -e + exit $FAIL +} + +cd instances + +# we don't want to fail on error +set +e + +mkdir x +rmdir x +result=$? + +if [ $result -ne 0 ]; then + echo "instance rmdir not supported" + exit_unsupported +fi + +instance_slam() { + while :; do + mkdir foo 2> /dev/null + rmdir foo 2> /dev/null + done +} + +instance_read() { + while :; do + cat foo/trace 1> /dev/null 2>&1 + done +} + +instance_set() { + while :; do + echo 1 > foo/events/sched/sched_switch + done 2> /dev/null +} + +instance_slam & +p1=$! +echo $p1 + +instance_set & +p2=$! +echo $p2 + +instance_read & +p3=$! +echo $p3 + +sleep 1 + +kill -1 $p3 +kill -1 $p2 +kill -1 $p1 + +echo "Wait for processes to finish" +wait $p1 $p2 $p3 +echo "all processes finished, wait for cleanup" +sleep 1 + +mkdir foo +ls foo > /dev/null +rmdir foo +if [ -d foo ]; then + fail "foo still exists" +fi +exit 0 + + + + +instance_slam() { + while :; do + mkdir x + mkdir y + mkdir z + rmdir x + rmdir y + rmdir z + done 2>/dev/null +} + +instance_slam & +x=`jobs -l` +p1=`echo $x | cut -d' ' -f2` +echo $p1 + +instance_slam & +x=`jobs -l | tail -1` +p2=`echo $x | cut -d' ' -f2` +echo $p2 + +instance_slam & +x=`jobs -l | tail -1` +p3=`echo $x | cut -d' ' -f2` +echo $p3 + +instance_slam & +x=`jobs -l | tail -1` +p4=`echo $x | cut -d' ' -f2` +echo $p4 + +instance_slam & +x=`jobs -l | tail -1` +p5=`echo $x | cut -d' ' -f2` +echo $p5 + +ls -lR >/dev/null +sleep 1 + +kill -1 $p1 +kill -1 $p2 +kill -1 $p3 +kill -1 $p4 +kill -1 $p5 + +echo "Wait for processes to finish" +wait $p1 $p2 $p3 $p4 $p5 +echo "all processes finished, wait for cleanup" + +mkdir x y z +ls x y z +rmdir x y z +for d in x y z; do + if [ -d $d ]; then + fail "instance $d still exists" + fi +done + +set -e + +exit 0 -- cgit From cd03412a51ac4cb3001a8cdfae4560c9602f3387 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 11 Mar 2016 10:15:36 -0800 Subject: libnvdimm, dax: introduce device-dax infrastructure Device DAX is the device-centric analogue of Filesystem DAX (CONFIG_FS_DAX). It allows persistent memory ranges to be allocated and mapped without need of an intervening file system. This initial infrastructure arranges for a libnvdimm pfn-device to be represented as a different device-type so that it can be attached to a driver other than the pmem driver. Signed-off-by: Dan Williams --- drivers/nvdimm/Kconfig | 13 ++++++ drivers/nvdimm/Makefile | 1 + drivers/nvdimm/bus.c | 4 ++ drivers/nvdimm/claim.c | 2 + drivers/nvdimm/dax_devs.c | 99 +++++++++++++++++++++++++++++++++++++++ drivers/nvdimm/namespace_devs.c | 19 +++++++- drivers/nvdimm/nd-core.h | 1 + drivers/nvdimm/nd.h | 25 ++++++++++ drivers/nvdimm/pfn_devs.c | 100 +++++++++++++++++++++++++++------------- drivers/nvdimm/region.c | 2 + drivers/nvdimm/region_devs.c | 29 ++++++++++++ include/uapi/linux/ndctl.h | 2 + tools/testing/nvdimm/Kbuild | 1 + 13 files changed, 264 insertions(+), 34 deletions(-) create mode 100644 drivers/nvdimm/dax_devs.c (limited to 'tools/testing') diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 53c11621d5b1..7c8a3bf07884 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -88,4 +88,17 @@ config NVDIMM_PFN Select Y if unsure +config NVDIMM_DAX + bool "NVDIMM DAX: Raw access to persistent memory" + default LIBNVDIMM + depends on NVDIMM_PFN + help + Support raw device dax access to a persistent memory + namespace. For environments that want to hard partition + peristent memory, this capability provides a mechanism to + sub-divide a namespace into character devices that can only be + accessed via DAX (mmap(2)). + + Select Y if unsure + endif diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index ea84d3c4e8e5..909554c3f955 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -23,3 +23,4 @@ libnvdimm-y += label.o libnvdimm-$(CONFIG_ND_CLAIM) += claim.o libnvdimm-$(CONFIG_BTT) += btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o +libnvdimm-$(CONFIG_NVDIMM_DAX) += dax_devs.o diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 19f822d7f652..97589e3cb852 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -40,6 +40,8 @@ static int to_nd_device_type(struct device *dev) return ND_DEVICE_REGION_PMEM; else if (is_nd_blk(dev)) return ND_DEVICE_REGION_BLK; + else if (is_nd_dax(dev)) + return ND_DEVICE_DAX_PMEM; else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent)) return nd_region_to_nstype(to_nd_region(dev->parent)); @@ -246,6 +248,8 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie) void __nd_device_register(struct device *dev) { + if (!dev) + return; dev->bus = &nvdimm_bus_type; get_device(dev); async_schedule_domain(nd_async_device_register, dev, diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 6bbd0a36994a..5f53db59a058 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -85,6 +85,8 @@ static bool is_idle(struct device *dev, struct nd_namespace_common *ndns) seed = nd_region->btt_seed; else if (is_nd_pfn(dev)) seed = nd_region->pfn_seed; + else if (is_nd_dax(dev)) + seed = nd_region->dax_seed; if (seed == dev || ndns || dev->driver) return false; diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c new file mode 100644 index 000000000000..f90f7549e7f4 --- /dev/null +++ b/drivers/nvdimm/dax_devs.c @@ -0,0 +1,99 @@ +/* + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include "nd-core.h" +#include "nd.h" + +static void nd_dax_release(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_dax *nd_dax = to_nd_dax(dev); + struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; + + dev_dbg(dev, "%s\n", __func__); + nd_detach_ndns(dev, &nd_pfn->ndns); + ida_simple_remove(&nd_region->dax_ida, nd_pfn->id); + kfree(nd_pfn->uuid); + kfree(nd_dax); +} + +static struct device_type nd_dax_device_type = { + .name = "nd_dax", + .release = nd_dax_release, +}; + +bool is_nd_dax(struct device *dev) +{ + return dev ? dev->type == &nd_dax_device_type : false; +} +EXPORT_SYMBOL(is_nd_dax); + +struct nd_dax *to_nd_dax(struct device *dev) +{ + struct nd_dax *nd_dax = container_of(dev, struct nd_dax, nd_pfn.dev); + + WARN_ON(!is_nd_dax(dev)); + return nd_dax; +} +EXPORT_SYMBOL(to_nd_dax); + +static const struct attribute_group *nd_dax_attribute_groups[] = { + &nd_pfn_attribute_group, + &nd_device_attribute_group, + &nd_numa_attribute_group, + NULL, +}; + +static struct nd_dax *nd_dax_alloc(struct nd_region *nd_region) +{ + struct nd_pfn *nd_pfn; + struct nd_dax *nd_dax; + struct device *dev; + + nd_dax = kzalloc(sizeof(*nd_dax), GFP_KERNEL); + if (!nd_dax) + return NULL; + + nd_pfn = &nd_dax->nd_pfn; + nd_pfn->id = ida_simple_get(&nd_region->dax_ida, 0, 0, GFP_KERNEL); + if (nd_pfn->id < 0) { + kfree(nd_dax); + return NULL; + } + + dev = &nd_pfn->dev; + dev_set_name(dev, "dax%d.%d", nd_region->id, nd_pfn->id); + dev->groups = nd_dax_attribute_groups; + dev->type = &nd_dax_device_type; + dev->parent = &nd_region->dev; + + return nd_dax; +} + +struct device *nd_dax_create(struct nd_region *nd_region) +{ + struct device *dev = NULL; + struct nd_dax *nd_dax; + + if (!is_nd_pmem(&nd_region->dev)) + return NULL; + + nd_dax = nd_dax_alloc(nd_region); + if (nd_dax) + dev = nd_pfn_devinit(&nd_dax->nd_pfn, NULL); + __nd_device_register(dev); + return dev; +} diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index e5ad5162bf34..c5e3196c45b0 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1288,6 +1288,8 @@ static ssize_t mode_show(struct device *dev, mode = "safe"; else if (claim && is_nd_pfn(claim)) mode = "memory"; + else if (claim && is_nd_dax(claim)) + mode = "dax"; else if (!claim && pmem_should_map_pages(dev)) mode = "memory"; else @@ -1379,14 +1381,17 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) { struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL; struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL; + struct nd_dax *nd_dax = is_nd_dax(dev) ? to_nd_dax(dev) : NULL; struct nd_namespace_common *ndns = NULL; resource_size_t size; - if (nd_btt || nd_pfn) { + if (nd_btt || nd_pfn || nd_dax) { if (nd_btt) ndns = nd_btt->ndns; else if (nd_pfn) ndns = nd_pfn->ndns; + else if (nd_dax) + ndns = nd_dax->nd_pfn.ndns; if (!ndns) return ERR_PTR(-ENODEV); @@ -1779,6 +1784,18 @@ void nd_region_create_blk_seed(struct nd_region *nd_region) nd_device_register(nd_region->ns_seed); } +void nd_region_create_dax_seed(struct nd_region *nd_region) +{ + WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); + nd_region->dax_seed = nd_dax_create(nd_region); + /* + * Seed creation failures are not fatal, provisioning is simply + * disabled until memory becomes available + */ + if (!nd_region->dax_seed) + dev_err(&nd_region->dev, "failed to create dax namespace\n"); +} + void nd_region_create_pfn_seed(struct nd_region *nd_region) { WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 1d1500f3d8b5..cb65308c0329 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -54,6 +54,7 @@ struct nd_region; void nd_region_create_blk_seed(struct nd_region *nd_region); void nd_region_create_btt_seed(struct nd_region *nd_region); void nd_region_create_pfn_seed(struct nd_region *nd_region); +void nd_region_create_dax_seed(struct nd_region *nd_region); void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev); int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 6c36509662e4..46910b8f32b1 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -101,10 +101,12 @@ struct nd_region { struct ida ns_ida; struct ida btt_ida; struct ida pfn_ida; + struct ida dax_ida; unsigned long flags; struct device *ns_seed; struct device *btt_seed; struct device *pfn_seed; + struct device *dax_seed; u16 ndr_mappings; u64 ndr_size; u64 ndr_start; @@ -161,6 +163,10 @@ struct nd_pfn { struct nd_namespace_common *ndns; }; +struct nd_dax { + struct nd_pfn nd_pfn; +}; + enum nd_async_mode { ND_SYNC, ND_ASYNC, @@ -224,7 +230,10 @@ struct nd_pfn *to_nd_pfn(struct device *dev); int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_pfn(struct device *dev); struct device *nd_pfn_create(struct nd_region *nd_region); +struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, + struct nd_namespace_common *ndns); int nd_pfn_validate(struct nd_pfn *nd_pfn); +extern struct attribute_group nd_pfn_attribute_group; #else static inline int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) @@ -248,6 +257,22 @@ static inline int nd_pfn_validate(struct nd_pfn *nd_pfn) } #endif +struct nd_dax *to_nd_dax(struct device *dev); +#if IS_ENABLED(CONFIG_NVDIMM_DAX) +bool is_nd_dax(struct device *dev); +struct device *nd_dax_create(struct nd_region *nd_region); +#else +static inline bool is_nd_dax(struct device *dev) +{ + return false; +} + +static inline struct device *nd_dax_create(struct nd_region *nd_region) +{ + return NULL; +} +#endif + struct nd_region *to_nd_region(struct device *dev); int nd_region_to_nstype(struct nd_region *nd_region); int nd_region_register_namespaces(struct nd_region *nd_region, int *err); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index e8693fe65e49..6ade2eb7615d 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -54,10 +54,29 @@ struct nd_pfn *to_nd_pfn(struct device *dev) } EXPORT_SYMBOL(to_nd_pfn); +static struct nd_pfn *to_nd_pfn_safe(struct device *dev) +{ + /* + * pfn device attributes are re-used by dax device instances, so we + * need to be careful to correct device-to-nd_pfn conversion. + */ + if (is_nd_pfn(dev)) + return to_nd_pfn(dev); + + if (is_nd_dax(dev)) { + struct nd_dax *nd_dax = to_nd_dax(dev); + + return &nd_dax->nd_pfn; + } + + WARN_ON(1); + return NULL; +} + static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); switch (nd_pfn->mode) { case PFN_MODE_RAM: @@ -72,7 +91,7 @@ static ssize_t mode_show(struct device *dev, static ssize_t mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc = 0; device_lock(dev); @@ -106,7 +125,7 @@ static DEVICE_ATTR_RW(mode); static ssize_t align_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); return sprintf(buf, "%lx\n", nd_pfn->align); } @@ -134,7 +153,7 @@ static ssize_t __align_store(struct nd_pfn *nd_pfn, const char *buf) static ssize_t align_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -152,7 +171,7 @@ static DEVICE_ATTR_RW(align); static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); if (nd_pfn->uuid) return sprintf(buf, "%pUb\n", nd_pfn->uuid); @@ -162,7 +181,7 @@ static ssize_t uuid_show(struct device *dev, static ssize_t uuid_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -178,7 +197,7 @@ static DEVICE_ATTR_RW(uuid); static ssize_t namespace_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; nvdimm_bus_lock(dev); @@ -191,7 +210,7 @@ static ssize_t namespace_show(struct device *dev, static ssize_t namespace_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -209,7 +228,7 @@ static DEVICE_ATTR_RW(namespace); static ssize_t resource_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -235,7 +254,7 @@ static DEVICE_ATTR_RO(resource); static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -270,7 +289,7 @@ static struct attribute *nd_pfn_attributes[] = { NULL, }; -static struct attribute_group nd_pfn_attribute_group = { +struct attribute_group nd_pfn_attribute_group = { .attrs = nd_pfn_attributes, }; @@ -281,15 +300,31 @@ static const struct attribute_group *nd_pfn_attribute_groups[] = { NULL, }; -static struct device *__nd_pfn_create(struct nd_region *nd_region, +struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, struct nd_namespace_common *ndns) { - struct nd_pfn *nd_pfn; - struct device *dev; + struct device *dev = &nd_pfn->dev; - /* we can only create pages for contiguous ranged of pmem */ - if (!is_nd_pmem(&nd_region->dev)) + if (!nd_pfn) + return NULL; + + nd_pfn->mode = PFN_MODE_NONE; + nd_pfn->align = HPAGE_SIZE; + dev = &nd_pfn->dev; + device_initialize(&nd_pfn->dev); + if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { + dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", + __func__, dev_name(ndns->claim)); + put_device(dev); return NULL; + } + return dev; +} + +static struct nd_pfn *nd_pfn_alloc(struct nd_region *nd_region) +{ + struct nd_pfn *nd_pfn; + struct device *dev; nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); if (!nd_pfn) @@ -301,29 +336,27 @@ static struct device *__nd_pfn_create(struct nd_region *nd_region, return NULL; } - nd_pfn->mode = PFN_MODE_NONE; - nd_pfn->align = HPAGE_SIZE; dev = &nd_pfn->dev; dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); - dev->parent = &nd_region->dev; - dev->type = &nd_pfn_device_type; dev->groups = nd_pfn_attribute_groups; - device_initialize(&nd_pfn->dev); - if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { - dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", - __func__, dev_name(ndns->claim)); - put_device(dev); - return NULL; - } - return dev; + dev->type = &nd_pfn_device_type; + dev->parent = &nd_region->dev; + + return nd_pfn; } struct device *nd_pfn_create(struct nd_region *nd_region) { - struct device *dev = __nd_pfn_create(nd_region, NULL); + struct nd_pfn *nd_pfn; + struct device *dev; + + if (!is_nd_pmem(&nd_region->dev)) + return NULL; + + nd_pfn = nd_pfn_alloc(nd_region); + dev = nd_pfn_devinit(nd_pfn, NULL); - if (dev) - __nd_device_register(dev); + __nd_device_register(dev); return dev; } @@ -423,7 +456,8 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) return -ENODEV; nvdimm_bus_lock(&ndns->dev); - pfn_dev = __nd_pfn_create(nd_region, ndns); + nd_pfn = nd_pfn_alloc(nd_region); + pfn_dev = nd_pfn_devinit(nd_pfn, ndns); nvdimm_bus_unlock(&ndns->dev); if (!pfn_dev) return -ENOMEM; diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 4b7715e29cff..05a912359939 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -54,6 +54,7 @@ static int nd_region_probe(struct device *dev) nd_region->btt_seed = nd_btt_create(nd_region); nd_region->pfn_seed = nd_pfn_create(nd_region); + nd_region->dax_seed = nd_dax_create(nd_region); if (err == 0) return 0; @@ -86,6 +87,7 @@ static int nd_region_remove(struct device *dev) nd_region->ns_seed = NULL; nd_region->btt_seed = NULL; nd_region->pfn_seed = NULL; + nd_region->dax_seed = NULL; dev_set_drvdata(dev, NULL); nvdimm_bus_unlock(dev); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 139bf71ca549..9e1b054e0e61 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -306,6 +306,23 @@ static ssize_t pfn_seed_show(struct device *dev, } static DEVICE_ATTR_RO(pfn_seed); +static ssize_t dax_seed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (nd_region->dax_seed) + rc = sprintf(buf, "%s\n", dev_name(nd_region->dax_seed)); + else + rc = sprintf(buf, "\n"); + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(dax_seed); + static ssize_t read_only_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -335,6 +352,7 @@ static struct attribute *nd_region_attributes[] = { &dev_attr_mappings.attr, &dev_attr_btt_seed.attr, &dev_attr_pfn_seed.attr, + &dev_attr_dax_seed.attr, &dev_attr_read_only.attr, &dev_attr_set_cookie.attr, &dev_attr_available_size.attr, @@ -353,6 +371,9 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) if (!is_nd_pmem(dev) && a == &dev_attr_pfn_seed.attr) return 0; + if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr) + return 0; + if (a != &dev_attr_set_cookie.attr && a != &dev_attr_available_size.attr) return a->mode; @@ -441,6 +462,13 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, nd_region_create_pfn_seed(nd_region); nvdimm_bus_unlock(dev); } + if (is_nd_dax(dev) && probe) { + nd_region = to_nd_region(dev->parent); + nvdimm_bus_lock(dev); + if (nd_region->dax_seed == dev) + nd_region_create_dax_seed(nd_region); + nvdimm_bus_unlock(dev); + } } void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev) @@ -718,6 +746,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, ida_init(&nd_region->ns_ida); ida_init(&nd_region->btt_ida); ida_init(&nd_region->pfn_ida); + ida_init(&nd_region->dax_ida); dev = &nd_region->dev; dev_set_name(dev, "region%d", nd_region->id); dev->parent = &nvdimm_bus->dev; diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 7cc28ab05b87..4f29d247f709 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -206,6 +206,7 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_DEVICE_NAMESPACE_IO 4 /* legacy persistent memory */ #define ND_DEVICE_NAMESPACE_PMEM 5 /* PMEM namespace (may alias with BLK) */ #define ND_DEVICE_NAMESPACE_BLK 6 /* BLK namespace (may alias with PMEM) */ +#define ND_DEVICE_DAX_PMEM 7 /* Device DAX interface to pmem */ enum nd_driver_flags { ND_DRIVER_DIMM = 1 << ND_DEVICE_DIMM, @@ -214,6 +215,7 @@ enum nd_driver_flags { ND_DRIVER_NAMESPACE_IO = 1 << ND_DEVICE_NAMESPACE_IO, ND_DRIVER_NAMESPACE_PMEM = 1 << ND_DEVICE_NAMESPACE_PMEM, ND_DRIVER_NAMESPACE_BLK = 1 << ND_DEVICE_NAMESPACE_BLK, + ND_DRIVER_DAX_PMEM = 1 << ND_DEVICE_DAX_PMEM, }; enum { diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index d5bc8c080b44..5ff6d3c126a9 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -50,6 +50,7 @@ libnvdimm-y += $(NVDIMM_SRC)/label.o libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o +libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o libnvdimm-y += config_check.o obj-m += test/ -- cgit From 438517ec78fa4e9db9892e749ed50e2a21f5f17f Mon Sep 17 00:00:00 2001 From: Chris Smart Date: Mon, 2 May 2016 13:51:38 +1000 Subject: selftests/powerpc: Test cp_abort during context switch Test that performing a copy paste sequence in userspace on P9 does not result in a leak of the copy into the paste of another process. This is based on Anton Blanchard's context_switch benchmarking code. It sets up two processes tied to the same CPU, one which copies and one which pastes. The paste should never succeed and the test fails if it does. This is a test for commit, "8a64904 powerpc: Add support for userspace P9 copy paste." Patch created with much assistance from Michael Neuling Signed-off-by: Chris Smart Reviewed-by: Cyril Bur Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/Makefile | 1 + .../selftests/powerpc/context_switch/.gitignore | 1 + .../selftests/powerpc/context_switch/Makefile | 10 ++ .../selftests/powerpc/context_switch/cp_abort.c | 110 +++++++++++++++++++++ tools/testing/selftests/powerpc/utils.h | 7 ++ 5 files changed, 129 insertions(+) create mode 100644 tools/testing/selftests/powerpc/context_switch/.gitignore create mode 100644 tools/testing/selftests/powerpc/context_switch/Makefile create mode 100644 tools/testing/selftests/powerpc/context_switch/cp_abort.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index b08f77cbe31b..4ca83fe80654 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -14,6 +14,7 @@ export CFLAGS SUB_DIRS = benchmarks \ copyloops \ + context_switch \ dscr \ mm \ pmu \ diff --git a/tools/testing/selftests/powerpc/context_switch/.gitignore b/tools/testing/selftests/powerpc/context_switch/.gitignore new file mode 100644 index 000000000000..c1431af7b51c --- /dev/null +++ b/tools/testing/selftests/powerpc/context_switch/.gitignore @@ -0,0 +1 @@ +cp_abort diff --git a/tools/testing/selftests/powerpc/context_switch/Makefile b/tools/testing/selftests/powerpc/context_switch/Makefile new file mode 100644 index 000000000000..e164d1466466 --- /dev/null +++ b/tools/testing/selftests/powerpc/context_switch/Makefile @@ -0,0 +1,10 @@ +TEST_PROGS := cp_abort + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c ../utils.c + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/context_switch/cp_abort.c b/tools/testing/selftests/powerpc/context_switch/cp_abort.c new file mode 100644 index 000000000000..5a5b55afda0e --- /dev/null +++ b/tools/testing/selftests/powerpc/context_switch/cp_abort.c @@ -0,0 +1,110 @@ +/* + * Adapted from Anton Blanchard's context switch microbenchmark. + * + * Copyright 2009, Anton Blanchard, IBM Corporation. + * Copyright 2016, Mikey Neuling, Chris Smart, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This program tests the copy paste abort functionality of a P9 + * (or later) by setting up two processes on the same CPU, one + * which executes the copy instruction and the other which + * executes paste. + * + * The paste instruction should never succeed, as the cp_abort + * instruction is called by the kernel during a context switch. + * + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include "utils.h" +#include + +#define READ_FD 0 +#define WRITE_FD 1 + +#define NUM_LOOPS 1000 + +/* This defines the "paste" instruction from Power ISA 3.0 Book II, section 4.4. */ +#define PASTE(RA, RB, L, RC) \ + .long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) | (RC) << (31-31)) + +int paste(void *i) +{ + int cr; + + asm volatile(str(PASTE(0, %1, 1, 1))";" + "mfcr %0;" + : "=r" (cr) + : "b" (i) + : "memory" + ); + return cr; +} + +/* This defines the "copy" instruction from Power ISA 3.0 Book II, section 4.4. */ +#define COPY(RA, RB, L) \ + .long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10)) + +void copy(void *i) +{ + asm volatile(str(COPY(0, %0, 1))";" + : + : "b" (i) + : "memory" + ); +} + +int test_cp_abort(void) +{ + /* 128 bytes for a full cache line */ + char buf[128] __cacheline_aligned; + cpu_set_t cpuset; + int fd1[2], fd2[2], pid; + char c; + + /* only run this test on a P9 or later */ + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); + + /* + * Run both processes on the same CPU, so that copy is more likely + * to leak into a paste. + */ + CPU_ZERO(&cpuset); + CPU_SET(pick_online_cpu(), &cpuset); + FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset)); + + FAIL_IF(pipe(fd1) || pipe(fd2)); + + pid = fork(); + FAIL_IF(pid < 0); + + if (!pid) { + for (int i = 0; i < NUM_LOOPS; i++) { + FAIL_IF((write(fd1[WRITE_FD], &c, 1)) != 1); + FAIL_IF((read(fd2[READ_FD], &c, 1)) != 1); + /* A paste succeeds if CR0 EQ bit is set */ + FAIL_IF(paste(buf) & 0x20000000); + } + } else { + for (int i = 0; i < NUM_LOOPS; i++) { + FAIL_IF((read(fd1[READ_FD], &c, 1)) != 1); + copy(buf); + FAIL_IF((write(fd2[WRITE_FD], &c, 1) != 1)); + } + } + return 0; + +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_cp_abort, "cp_abort"); +} diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h index 175ac6ad10dd..3b370deafb62 100644 --- a/tools/testing/selftests/powerpc/utils.h +++ b/tools/testing/selftests/powerpc/utils.h @@ -6,6 +6,8 @@ #ifndef _SELFTESTS_POWERPC_UTILS_H #define _SELFTESTS_POWERPC_UTILS_H +#define __cacheline_aligned __attribute__((aligned(128))) + #include #include #include @@ -54,4 +56,9 @@ do { \ #define _str(s) #s #define str(s) _str(s) +/* POWER9 feature */ +#ifndef PPC_FEATURE2_ARCH_3_00 +#define PPC_FEATURE2_ARCH_3_00 0x00800000 +#endif + #endif /* _SELFTESTS_POWERPC_UTILS_H */ -- cgit From 2f67798c1f9f35ba576ac0639b9b648b9b2033f6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 2 May 2016 14:54:29 +1000 Subject: selftests/powerpc: Fix subpage_prot test to return !0 on failure It's helpful for automated testing if the test returns error codes back to the calling program. While we're here fix all the usages of %p to remove the double 0x, ie. %p already includes 0x. Signed-off-by: Michael Ellerman Reviewed-by: Aneesh Kumar K.V --- tools/testing/selftests/powerpc/mm/subpage_prot.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c index 440180ff8089..35ade7406dcd 100644 --- a/tools/testing/selftests/powerpc/mm/subpage_prot.c +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -73,7 +73,7 @@ static inline void check_faulted(void *addr, long page, long subpage, int write) want_fault |= (subpage == ((page + 1) % 16)); if (faulted != want_fault) { - printf("Failed at 0x%p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n", + printf("Failed at %p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n", addr, page, subpage, write, want_fault ? "fault" : "pass", faulted ? "fault" : "pass"); @@ -82,7 +82,7 @@ static inline void check_faulted(void *addr, long page, long subpage, int write) if (faulted) { if (dar != addr) { - printf("Fault expected at 0x%p and happened at 0x%p !\n", + printf("Fault expected at %p and happened at %p !\n", addr, dar); } faulted = 0; @@ -162,7 +162,7 @@ int test_anon(void) mallocblock = (void *)align; - printf("allocated malloc block of 0x%lx bytes at 0x%p\n", + printf("allocated malloc block of 0x%lx bytes at %p\n", mallocsize, mallocblock); printf("testing malloc block...\n"); @@ -197,7 +197,7 @@ int test_file(void) perror("failed to map file"); return 1; } - printf("allocated %s for 0x%lx bytes at 0x%p\n", + printf("allocated %s for 0x%lx bytes at %p\n", file_name, filesize, fileblock); printf("testing file map...\n"); @@ -207,14 +207,16 @@ int test_file(void) int main(int argc, char *argv[]) { - test_harness(test_anon, "subpage_prot_anon"); + int rc; + + rc = test_harness(test_anon, "subpage_prot_anon"); + if (rc) + return rc; if (argc > 1) file_name = argv[1]; else file_name = "tempfile"; - test_harness(test_file, "subpage_prot_file"); - - return 0; + return test_harness(test_file, "subpage_prot_file"); } -- cgit From 2d59b3b25659463a24f05df367574d90b3cd7145 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Wed, 23 Dec 2015 16:49:50 +1100 Subject: selftests/powerpc: Make reg.h common to all powerpc selftests Currently there is a reg.h in pmu/ebb that has defines that are useful in other powerpc selftests so move this up into selftests/powerpc folder. Also include in utils.h - as this is often used in self tests. Add in some other useful register defines. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/pmu/ebb/ebb.c | 1 - tools/testing/selftests/powerpc/pmu/ebb/reg.h | 49 ------------------- .../selftests/powerpc/pmu/ebb/reg_access_test.c | 1 - tools/testing/selftests/powerpc/reg.h | 55 ++++++++++++++++++++++ tools/testing/selftests/powerpc/utils.h | 1 + 5 files changed, 56 insertions(+), 51 deletions(-) delete mode 100644 tools/testing/selftests/powerpc/pmu/ebb/reg.h create mode 100644 tools/testing/selftests/powerpc/reg.h (limited to 'tools/testing') diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c index e67452f1bcff..46681fec549b 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c @@ -15,7 +15,6 @@ #include #include "trace.h" -#include "reg.h" #include "ebb.h" diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg.h b/tools/testing/selftests/powerpc/pmu/ebb/reg.h deleted file mode 100644 index 5921b0dfe2e9..000000000000 --- a/tools/testing/selftests/powerpc/pmu/ebb/reg.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2014, Michael Ellerman, IBM Corp. - * Licensed under GPLv2. - */ - -#ifndef _SELFTESTS_POWERPC_REG_H -#define _SELFTESTS_POWERPC_REG_H - -#define __stringify_1(x) #x -#define __stringify(x) __stringify_1(x) - -#define mfspr(rn) ({unsigned long rval; \ - asm volatile("mfspr %0," __stringify(rn) \ - : "=r" (rval)); rval; }) -#define mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \ - : "r" ((unsigned long)(v)) \ - : "memory") - -#define mb() asm volatile("sync" : : : "memory"); - -#define SPRN_MMCR2 769 -#define SPRN_MMCRA 770 -#define SPRN_MMCR0 779 -#define MMCR0_PMAO 0x00000080 -#define MMCR0_PMAE 0x04000000 -#define MMCR0_FC 0x80000000 -#define SPRN_EBBHR 804 -#define SPRN_EBBRR 805 -#define SPRN_BESCR 806 /* Branch event status & control register */ -#define SPRN_BESCRS 800 /* Branch event status & control set (1 bits set to 1) */ -#define SPRN_BESCRSU 801 /* Branch event status & control set upper */ -#define SPRN_BESCRR 802 /* Branch event status & control REset (1 bits set to 0) */ -#define SPRN_BESCRRU 803 /* Branch event status & control REset upper */ - -#define BESCR_PMEO 0x1 /* PMU Event-based exception Occurred */ -#define BESCR_PME (0x1ul << 32) /* PMU Event-based exception Enable */ - -#define SPRN_PMC1 771 -#define SPRN_PMC2 772 -#define SPRN_PMC3 773 -#define SPRN_PMC4 774 -#define SPRN_PMC5 775 -#define SPRN_PMC6 776 - -#define SPRN_SIAR 780 -#define SPRN_SDAR 781 -#define SPRN_SIER 768 - -#endif /* _SELFTESTS_POWERPC_REG_H */ diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c index 5b1188f10c15..f923228bca22 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c @@ -7,7 +7,6 @@ #include #include "ebb.h" -#include "reg.h" /* diff --git a/tools/testing/selftests/powerpc/reg.h b/tools/testing/selftests/powerpc/reg.h new file mode 100644 index 000000000000..65bfdeeebdee --- /dev/null +++ b/tools/testing/selftests/powerpc/reg.h @@ -0,0 +1,55 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#ifndef _SELFTESTS_POWERPC_REG_H +#define _SELFTESTS_POWERPC_REG_H + +#define __stringify_1(x) #x +#define __stringify(x) __stringify_1(x) + +#define mfspr(rn) ({unsigned long rval; \ + asm volatile("mfspr %0," _str(rn) \ + : "=r" (rval)); rval; }) +#define mtspr(rn, v) asm volatile("mtspr " _str(rn) ",%0" : \ + : "r" ((unsigned long)(v)) \ + : "memory") + +#define mb() asm volatile("sync" : : : "memory"); + +#define SPRN_MMCR2 769 +#define SPRN_MMCRA 770 +#define SPRN_MMCR0 779 +#define MMCR0_PMAO 0x00000080 +#define MMCR0_PMAE 0x04000000 +#define MMCR0_FC 0x80000000 +#define SPRN_EBBHR 804 +#define SPRN_EBBRR 805 +#define SPRN_BESCR 806 /* Branch event status & control register */ +#define SPRN_BESCRS 800 /* Branch event status & control set (1 bits set to 1) */ +#define SPRN_BESCRSU 801 /* Branch event status & control set upper */ +#define SPRN_BESCRR 802 /* Branch event status & control REset (1 bits set to 0) */ +#define SPRN_BESCRRU 803 /* Branch event status & control REset upper */ + +#define BESCR_PMEO 0x1 /* PMU Event-based exception Occurred */ +#define BESCR_PME (0x1ul << 32) /* PMU Event-based exception Enable */ + +#define SPRN_PMC1 771 +#define SPRN_PMC2 772 +#define SPRN_PMC3 773 +#define SPRN_PMC4 774 +#define SPRN_PMC5 775 +#define SPRN_PMC6 776 + +#define SPRN_SIAR 780 +#define SPRN_SDAR 781 +#define SPRN_SIER 768 + +#define SPRN_TEXASR 0x82 +#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ +#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ +#define TEXASR_FS 0x08000000 +#define SPRN_TAR 0x32f + +#endif /* _SELFTESTS_POWERPC_REG_H */ diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h index 3b370deafb62..a985cfaa535e 100644 --- a/tools/testing/selftests/powerpc/utils.h +++ b/tools/testing/selftests/powerpc/utils.h @@ -11,6 +11,7 @@ #include #include #include +#include "reg.h" /* Avoid headaches with PRI?64 - just use %ll? always */ typedef unsigned long long u64; -- cgit From da3ddc3b5fea695f7b2fa89c4ca17dfd529293d2 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Wed, 23 Dec 2015 16:49:51 +1100 Subject: selftests/powerpc: Standardise TM calls Currently tbegin, tend etc are written as opcodes or asm instructions. So standardise these to asm instructions. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/tm-resched-dscr.c | 16 +++++----------- tools/testing/selftests/powerpc/tm/tm-signal-stack.c | 4 ++-- 2 files changed, 7 insertions(+), 13 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c index 8fde93d6021f..d9c49f41515e 100644 --- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c +++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c @@ -31,12 +31,6 @@ #include "utils.h" #include "tm.h" -#define TBEGIN ".long 0x7C00051D ;" -#define TEND ".long 0x7C00055D ;" -#define TCHECK ".long 0x7C00059C ;" -#define TSUSPEND ".long 0x7C0005DD ;" -#define TRESUME ".long 0x7C2005DD ;" -#define SPRN_TEXASR 0x82 #define SPRN_DSCR 0x03 int test_body(void) @@ -55,13 +49,13 @@ int test_body(void) "mtspr %[sprn_dscr], 3;" /* start and suspend a transaction */ - TBEGIN + "tbegin.;" "beq 1f;" - TSUSPEND + "tsuspend.;" /* hard loop until the transaction becomes doomed */ "2: ;" - TCHECK + "tcheck 0;" "bc 4, 0, 2b;" /* record DSCR and TEXASR */ @@ -70,8 +64,8 @@ int test_body(void) "mfspr 3, %[sprn_texasr];" "std 3, %[texasr];" - TRESUME - TEND + "tresume.;" + "tend.;" "li %[rv], 0;" "1: ;" : [rv]"=r"(rv), [dscr2]"=m"(dscr2), [texasr]"=m"(texasr) diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c index e44a238c1d77..1f0eb567438d 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c @@ -60,9 +60,9 @@ int tm_signal_stack() exit(1); asm volatile("li 1, 0 ;" /* stack ptr == NULL */ "1:" - ".long 0x7C00051D ;" /* tbegin */ + "tbegin.;" "beq 1b ;" /* retry forever */ - ".long 0x7C0005DD ; ;" /* tsuspend */ + "tsuspend.;" "ld 2, 0(1) ;" /* trigger segv" */ : : : "memory"); -- cgit From d95be4ca3e457044be55c8b1c1b5ac64f17d8a92 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Wed, 23 Dec 2015 16:49:52 +1100 Subject: selftests/powerpc: Add test for forking inside transaction This test does a fork syscall inside a transaction. Basic sniff test to see if we can enter the kernel during a transaction. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/.gitignore | 1 + tools/testing/selftests/powerpc/tm/Makefile | 2 +- tools/testing/selftests/powerpc/tm/tm-fork.c | 42 +++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/tm/tm-fork.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 7d0f14b8cb2e..d216eddcd493 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -3,3 +3,4 @@ tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy +tm-fork diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 737f72c964e6..2db475b12720 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,4 +1,4 @@ -TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy +TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork all: $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/tm/tm-fork.c b/tools/testing/selftests/powerpc/tm/tm-fork.c new file mode 100644 index 000000000000..8d48579b7778 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-fork.c @@ -0,0 +1,42 @@ +/* + * Copyright 2015, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * + * Edited: Rashmica Gupta, Nov 2015 + * + * This test does a fork syscall inside a transaction. Basic sniff test + * to see if we can enter the kernel during a transaction. + */ + +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tm.h" + +int test_fork(void) +{ + SKIP_IF(!have_htm()); + + asm __volatile__( + "tbegin.;" + "blt 1f; " + "li 0, 2;" /* fork syscall */ + "sc ;" + "tend.;" + "1: ;" + : : : "memory", "r0"); + /* If we reach here, we've passed. Otherwise we've probably crashed + * the kernel */ + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_fork, "tm_fork"); +} -- cgit From dbccb4940c9c9c8c93fcf733445e96dd2d6890d6 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Wed, 23 Dec 2015 16:49:53 +1100 Subject: selftests/powerpc: Add TM test to check if TAR is corrupted If the transaction is aborted, the TAR should be rolled back to the checkpointed value before the transaction began. The value written to the TAR when the transaction is suspended should only remain there if the transaction completes successfully. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/.gitignore | 1 + tools/testing/selftests/powerpc/tm/Makefile | 2 +- tools/testing/selftests/powerpc/tm/tm-tar.c | 90 +++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/tm/tm-tar.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index d216eddcd493..2ac376410c70 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -4,3 +4,4 @@ tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork +tm-tar diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 2db475b12720..cb4b3bf57a4d 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,4 +1,4 @@ -TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork +TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork tm-tar all: $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/tm/tm-tar.c b/tools/testing/selftests/powerpc/tm/tm-tar.c new file mode 100644 index 000000000000..2d2fcc2b7a60 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-tar.c @@ -0,0 +1,90 @@ +/* + * Copyright 2015, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * Original: Michael Neuling 19/7/2013 + * Edited: Rashmica Gupta 01/12/2015 + * + * Do some transactions, see if the tar is corrupted. + * If the transaction is aborted, the TAR should be rolled back to the + * checkpointed value before the transaction began. The value written to + * TAR in suspended mode should only remain in TAR if the transaction + * completes. + */ + +#include +#include +#include +#include + +#include "tm.h" +#include "utils.h" + +int num_loops = 10000; + +int test_tar(void) +{ + int i; + + SKIP_IF(!have_htm()); + + for (i = 0; i < num_loops; i++) + { + uint64_t result = 0; + asm __volatile__( + "li 7, 1;" + "mtspr %[tar], 7;" /* tar = 1 */ + "tbegin.;" + "beq 3f;" + "li 4, 0x7000;" /* Loop lots, to use time */ + "2:;" /* Start loop */ + "li 7, 2;" + "mtspr %[tar], 7;" /* tar = 2 */ + "tsuspend.;" + "li 7, 3;" + "mtspr %[tar], 7;" /* tar = 3 */ + "tresume.;" + "subi 4, 4, 1;" + "cmpdi 4, 0;" + "bne 2b;" + "tend.;" + + /* Transaction sucess! TAR should be 3 */ + "mfspr 7, %[tar];" + "ori %[res], 7, 4;" // res = 3|4 = 7 + "b 4f;" + + /* Abort handler. TAR should be rolled back to 1 */ + "3:;" + "mfspr 7, %[tar];" + "ori %[res], 7, 8;" // res = 1|8 = 9 + "4:;" + + : [res]"=r"(result) + : [tar]"i"(SPRN_TAR) + : "memory", "r0", "r4", "r7"); + + /* If result is anything else other than 7 or 9, the tar + * value must have been corrupted. */ + if ((result != 7) && (result != 9)) + return 1; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + /* A low number of iterations (eg 100) can cause a false pass */ + if (argc > 1) { + if (strcmp(argv[1], "-h") == 0) { + printf("Syntax:\n\t%s []\n", + argv[0]); + return 1; + } else { + num_loops = atoi(argv[1]); + } + } + + printf("Starting, %d loops\n", num_loops); + + return test_harness(test_tar, "tm_tar"); +} -- cgit From 16aab321872400a4ce35b90ba40484fcb5d636ba Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Wed, 23 Dec 2015 16:49:54 +1100 Subject: selftests/powerpc: Add test to check if TM SPRs are corrupted Testing that the TM SPRs are behaving the way they should. Uses more threads than cpus to see if the following register values persist with context switching: - the FS (failure summary) flag in TEXASR - TFIAR and TFHAR Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/.gitignore | 1 + tools/testing/selftests/powerpc/tm/Makefile | 3 +- tools/testing/selftests/powerpc/tm/tm-tmspr.c | 143 ++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/tm/tm-tmspr.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 2ac376410c70..bb942db845bf 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -5,3 +5,4 @@ tm-signal-stack tm-vmxcopy tm-fork tm-tar +tm-tmspr diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index cb4b3bf57a4d..d0505dbd22d5 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,4 +1,4 @@ -TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork tm-tar +TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork tm-tar tm-tmspr all: $(TEST_PROGS) @@ -6,6 +6,7 @@ $(TEST_PROGS): ../harness.c ../utils.c tm-syscall: tm-syscall-asm.S tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include +tm-tmspr: CFLAGS += -pthread include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c new file mode 100644 index 000000000000..2bda81c7bf23 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c @@ -0,0 +1,143 @@ +/* + * Copyright 2015, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * + * Original: Michael Neuling 3/4/2014 + * Modified: Rashmica Gupta 8/12/2015 + * + * Check if any of the Transaction Memory SPRs get corrupted. + * - TFIAR - stores address of location of transaction failure + * - TFHAR - stores address of software failure handler (if transaction + * fails) + * - TEXASR - lots of info about the transacion(s) + * + * (1) create more threads than cpus + * (2) in each thread: + * (a) set TFIAR and TFHAR a unique value + * (b) loop for awhile, continually checking to see if + * either register has been corrupted. + * + * (3) Loop: + * (a) begin transaction + * (b) abort transaction + * (c) check TEXASR to see if FS has been corrupted + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tm.h" + +int num_loops = 10000; +int passed = 1; + +void tfiar_tfhar(void *in) +{ + int i, cpu; + unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + cpu = (unsigned long)in >> 1; + CPU_SET(cpu, &cpuset); + sched_setaffinity(0, sizeof(cpuset), &cpuset); + + /* TFIAR: Last bit has to be high so userspace can read register */ + tfiar = ((unsigned long)in) + 1; + tfiar += 2; + mtspr(SPRN_TFIAR, tfiar); + + /* TFHAR: Last two bits are reserved */ + tfhar = ((unsigned long)in); + tfhar &= ~0x3UL; + tfhar += 4; + mtspr(SPRN_TFHAR, tfhar); + + for (i = 0; i < num_loops; i++) { + tfhar_rd = mfspr(SPRN_TFHAR); + tfiar_rd = mfspr(SPRN_TFIAR); + if ( (tfhar != tfhar_rd) || (tfiar != tfiar_rd) ) { + passed = 0; + return; + } + } + return; +} + +void texasr(void *in) +{ + unsigned long i; + uint64_t result = 0; + + for (i = 0; i < num_loops; i++) { + asm __volatile__( + "tbegin.;" + "beq 3f ;" + "tabort. 0 ;" + "tend.;" + + /* Abort handler */ + "3: ;" + ::: "memory"); + + /* Check the TEXASR */ + result = mfspr(SPRN_TEXASR); + if ((result & TEXASR_FS) == 0) { + passed = 0; + return; + } + } + return; +} + +int test_tmspr() +{ + pthread_t thread; + int thread_num; + unsigned long i; + + SKIP_IF(!have_htm()); + + /* To cause some context switching */ + thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN); + + /* Test TFIAR and TFHAR */ + for (i = 0 ; i < thread_num ; i += 2){ + if (pthread_create(&thread, NULL, (void*)tfiar_tfhar, (void *)i)) + return EXIT_FAILURE; + } + if (pthread_join(thread, NULL) != 0) + return EXIT_FAILURE; + + /* Test TEXASR */ + for (i = 0 ; i < thread_num ; i++){ + if (pthread_create(&thread, NULL, (void*)texasr, (void *)i)) + return EXIT_FAILURE; + } + if (pthread_join(thread, NULL) != 0) + return EXIT_FAILURE; + + if (passed) + return 0; + else + return 1; +} + +int main(int argc, char *argv[]) +{ + if (argc > 1) { + if (strcmp(argv[1], "-h") == 0) { + printf("Syntax:\t []\n"); + return 0; + } else { + num_loops = atoi(argv[1]); + } + } + return test_harness(test_tmspr, "tm_tmspr"); +} -- cgit From 0ce105bf9723e9a2dc7ec0a1e164c1b63aa64546 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 29 Mar 2016 09:35:29 +0100 Subject: selftests/seccomp: add MIPS self-test support This adds self-test support on MIPS, based on RFC patch from Kees Cook. Modifications from the RFC: - support the O32 syscall which passes the real syscall number in a0. - Use PTRACE_{GET,SET}REGS - Because SYSCALL_NUM and SYSCALL_RET are the same register, it is not possible to test modifying the syscall return value when skipping, since both would need to set the same register. Therefore modify that test case to just detect the skipped test. Tested on MIPS32r2 / MIPS64r2 with O32, N32 and N64 userlands. Signed-off-by: Matt Redfearn Acked-by: Kees Cook Cc: Andy Lutomirski Cc: Shuah Khan Cc: Will Drewry Cc: IMG-MIPSLinuxKerneldevelopers@imgtec.com Cc: linux-kernel@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/12977/ Signed-off-by: Ralf Baechle --- tools/testing/selftests/seccomp/seccomp_bpf.c | 30 +++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 150829dd7998..7947e568e057 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -5,6 +5,7 @@ * Test code for seccomp bpf. */ +#include #include #define __have_siginfo_t 1 #define __have_sigval_t 1 @@ -14,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -1242,6 +1242,12 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS s390_regs # define SYSCALL_NUM gprs[2] # define SYSCALL_RET gprs[2] +#elif defined(__mips__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM regs[2] +# define SYSCALL_SYSCALL_NUM regs[4] +# define SYSCALL_RET regs[2] +# define SYSCALL_NUM_RET_SHARE_REG #else # error "Do not know how to find your architecture's registers and syscalls" #endif @@ -1249,7 +1255,7 @@ TEST_F(TRACE_poke, getpid_runs_normally) /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). */ -#if defined(__x86_64__) || defined(__i386__) +#if defined(__x86_64__) || defined(__i386__) || defined(__mips__) #define HAVE_GETREGS #endif @@ -1273,6 +1279,10 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) } #endif +#if defined(__mips__) + if (regs.SYSCALL_NUM == __NR_O32_Linux) + return regs.SYSCALL_SYSCALL_NUM; +#endif return regs.SYSCALL_NUM; } @@ -1297,6 +1307,13 @@ void change_syscall(struct __test_metadata *_metadata, { regs.SYSCALL_NUM = syscall; } +#elif defined(__mips__) + { + if (regs.SYSCALL_NUM == __NR_O32_Linux) + regs.SYSCALL_SYSCALL_NUM = syscall; + else + regs.SYSCALL_NUM = syscall; + } #elif defined(__arm__) # ifndef PTRACE_SET_SYSCALL @@ -1327,7 +1344,11 @@ void change_syscall(struct __test_metadata *_metadata, /* If syscall is skipped, change return value. */ if (syscall == -1) +#ifdef SYSCALL_NUM_RET_SHARE_REG + TH_LOG("Can't modify syscall return on this architecture"); +#else regs.SYSCALL_RET = 1; +#endif #ifdef HAVE_GETREGS ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); @@ -1465,8 +1486,13 @@ TEST_F(TRACE_syscall, syscall_dropped) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); ASSERT_EQ(0, ret); +#ifdef SYSCALL_NUM_RET_SHARE_REG + /* gettid has been skipped */ + EXPECT_EQ(-1, syscall(__NR_gettid)); +#else /* gettid has been skipped and an altered return value stored. */ EXPECT_EQ(1, syscall(__NR_gettid)); +#endif EXPECT_NE(self->mytid, syscall(__NR_gettid)); } -- cgit From 5a614ec8a7cfe9098475fa1221b409fb7eec6054 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 10 May 2016 23:53:05 +0900 Subject: kselftests/ftrace: Detect tracefs mount point Currently ftracetest assumes tracing directory is located under $DEBUGFS/tracing. But it's possible to mount tracefs directly without debugfs. Signed-off-by: Namhyung Kim Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/ftracetest | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index da48812ab95e..4c6a0bf8ba79 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -88,7 +88,12 @@ parse_opts() { # opts # Parameters DEBUGFS_DIR=`grep debugfs /proc/mounts | cut -f2 -d' ' | head -1` -TRACING_DIR=$DEBUGFS_DIR/tracing +if [ -z "$DEBUGFS_DIR" ]; then + TRACING_DIR=`grep tracefs /proc/mounts | cut -f2 -d' ' | head -1` +else + TRACING_DIR=$DEBUGFS_DIR/tracing +fi + TOP_DIR=`absdir $0` TEST_DIR=$TOP_DIR/test.d TEST_CASES=`find_testcases $TEST_DIR` @@ -102,7 +107,7 @@ parse_opts $* [ $DEBUG -ne 0 ] && set -x # Verify parameters -if [ -z "$DEBUGFS_DIR" -o ! -d "$TRACING_DIR" ]; then +if [ -z "$TRACING_DIR" -o ! -d "$TRACING_DIR" ]; then errexit "No ftrace directory found" fi -- cgit From 2c6c3946c3955f96ae0d48fdac940903918207d8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 10 May 2016 23:53:06 +0900 Subject: kselftests/ftrace: Add a test case for event pid filtering Check event is filtered by set_event_pid and options/event-fork. Signed-off-by: Namhyung Kim Signed-off-by: Shuah Khan --- .../selftests/ftrace/test.d/event/event-pid.tc | 72 ++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/event/event-pid.tc (limited to 'tools/testing') diff --git a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc new file mode 100644 index 000000000000..d4ab27b522f8 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc @@ -0,0 +1,72 @@ +#!/bin/sh +# description: event tracing - restricts events based on pid + +do_reset() { + echo > set_event + echo > set_event_pid + echo 0 > options/event-fork + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f set_event_pid ]; then + echo "event pid filtering is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo 1 > events/sched/sched_switch/enable + +yield + +count=`cat trace | grep sched_switch | wc -l` +if [ $count -eq 0 ]; then + fail "sched_switch events are not recorded" +fi + +do_reset + +read mypid rest < /proc/self/stat + +echo $mypid > set_event_pid +echo 'sched:sched_switch' > set_event + +yield + +count=`cat trace | grep sched_switch | grep -v "pid=$mypid" | wc -l` +if [ $count -ne 0 ]; then + fail "sched_switch events from other task are recorded" +fi + +do_reset + +echo $mypid > set_event_pid +echo 1 > options/event-fork +echo 1 > events/sched/sched_switch/enable + +yield + +count=`cat trace | grep sched_switch | grep -v "pid=$mypid" | wc -l` +if [ $count -eq 0 ]; then + fail "sched_switch events from other task are not recorded" +fi + +do_reset + +exit 0 -- cgit From 6eab37daf0ec1077fd612ff27ab513db20f33767 Mon Sep 17 00:00:00 2001 From: Muhammad Falak R Wani Date: Fri, 13 May 2016 17:35:43 +0530 Subject: tools: testing: define the _GNU_SOURCE macro Add the macro _GNU_SOURCE, to fix CPU_ZERO and CPU_SET undefined compile errors. Signed-off-by: Muhammad Falak R Wani Signed-off-by: Shuah Khan --- tools/testing/selftests/intel_pstate/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh index bdaf37e92684..7868c106b8b1 100755 --- a/tools/testing/selftests/intel_pstate/run.sh +++ b/tools/testing/selftests/intel_pstate/run.sh @@ -32,7 +32,7 @@ EVALUATE_ONLY=0 max_cpus=$(($(nproc)-1)) # compile programs -gcc -o aperf aperf.c -lm +gcc aperf.c -Wall -D_GNU_SOURCE -o aperf -lm [ $? -ne 0 ] && echo "Problem compiling aperf.c." && exit 1 gcc -o msr msr.c -lm [ $? -ne 0 ] && echo "Problem compiling msr.c." && exit 1 -- cgit From f518b1607e128a8dcfa75f539864c1321c5a18ea Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:01:36 -0700 Subject: radix tree test suite: fix build Add an empty linux/init.h, and definitions for a few parts of the kernel API either in use now, or to be used in the near future. Start using the common definitions in tools/include/linux, although more work needs to be done here. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/linux/kernel.h | 12 ++++++++++-- tools/testing/radix-tree/linux/slab.h | 1 - tools/testing/radix-tree/linux/types.h | 7 ++----- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index ae013b0160ac..6d0cdf618084 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -7,19 +7,25 @@ #include #include +#include "../../include/linux/compiler.h" + #ifndef NULL #define NULL 0 #endif #define BUG_ON(expr) assert(!(expr)) +#define WARN_ON(expr) assert(!(expr)) #define __init #define __must_check #define panic(expr) #define printk printf #define __force -#define likely(c) (c) -#define unlikely(c) (c) #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) +#define pr_debug printk + +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define cpu_relax() barrier() #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) @@ -28,6 +34,8 @@ (type *)( (char *)__mptr - offsetof(type, member) );}) #define min(a, b) ((a) < (b) ? (a) : (b)) +#define cond_resched() sched_yield() + static inline int in_interrupt(void) { return 0; diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h index 57282506c21d..6d5a34770fd4 100644 --- a/tools/testing/radix-tree/linux/slab.h +++ b/tools/testing/radix-tree/linux/slab.h @@ -3,7 +3,6 @@ #include -#define GFP_KERNEL 1 #define SLAB_HWCACHE_ALIGN 1 #define SLAB_PANIC 2 #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ diff --git a/tools/testing/radix-tree/linux/types.h b/tools/testing/radix-tree/linux/types.h index 72a9d85f6c76..faa0b6ff9ca8 100644 --- a/tools/testing/radix-tree/linux/types.h +++ b/tools/testing/radix-tree/linux/types.h @@ -1,15 +1,13 @@ #ifndef _TYPES_H #define _TYPES_H +#include "../../include/linux/types.h" + #define __rcu #define __read_mostly #define BITS_PER_LONG (sizeof(long) * 8) -struct list_head { - struct list_head *next, *prev; -}; - static inline void INIT_LIST_HEAD(struct list_head *list) { list->next = list; @@ -22,7 +20,6 @@ typedef struct { #define uninitialized_var(x) x = x -typedef unsigned gfp_t; #include #endif -- cgit From d42cb1a9fffa9dc760c13302f00cdec25106e2f1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:01:39 -0700 Subject: radix tree test suite: add tests for radix_tree_locate_item() Fairly simple tests; add various items to the tree, then make sure we can find them again. Also check that a pointer that we know isn't in the tree is not found. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/linux/kernel.h | 3 +++ tools/testing/radix-tree/main.c | 41 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 6d0cdf618084..76a88f35fdc4 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -9,6 +9,9 @@ #include "../../include/linux/compiler.h" +#define CONFIG_SHMEM +#define CONFIG_SWAP + #ifndef NULL #define NULL 0 #endif diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 0e83cad27a9f..71c5272443b1 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -232,10 +232,51 @@ void copy_tag_check(void) item_kill_tree(&tree); } +void __locate_check(struct radix_tree_root *tree, unsigned long index) +{ + struct item *item; + unsigned long index2; + + item_insert(tree, index); + item = item_lookup(tree, index); + index2 = radix_tree_locate_item(tree, item); + if (index != index2) { + printf("index %ld inserted; found %ld\n", + index, index2); + abort(); + } +} + +static void locate_check(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + unsigned long offset, index; + + for (offset = 0; offset < (1 << 3); offset++) { + for (index = 0; index < (1UL << 5); index++) { + __locate_check(&tree, index + offset); + } + if (radix_tree_locate_item(&tree, &tree) != -1) + abort(); + + item_kill_tree(&tree); + } + + if (radix_tree_locate_item(&tree, &tree) != -1) + abort(); + __locate_check(&tree, -1); + if (radix_tree_locate_item(&tree, &tree) != -1) + abort(); + item_kill_tree(&tree); +} + static void single_thread_tests(void) { int i; + printf("starting single_thread_tests: %d allocated\n", nr_allocated); + locate_check(); + printf("after locate_check: %d allocated\n", nr_allocated); tag_check(); printf("after tag_check: %d allocated\n", nr_allocated); gang_check(); -- cgit From 97d778b2de9213c7a7483dad0f533c1af9f0810f Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:01:42 -0700 Subject: radix tree test suite: allow testing other fan-out values The defines in regression2.c are already in radix-tree.h and duplicating them in the test case makes experimenting with other values for the fan-out harder than necessary. Allow the user of the radix tree to decide what the fan-out should be rather than fixing it to 8 for non-kernel uses. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 4 +--- tools/testing/radix-tree/linux/kernel.h | 2 ++ tools/testing/radix-tree/regression2.c | 7 ------- 3 files changed, 3 insertions(+), 10 deletions(-) (limited to 'tools/testing') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 83f708e5db59..5ce5a1e0ecc5 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -70,10 +70,8 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) #define RADIX_TREE_MAX_TAGS 3 -#ifdef __KERNEL__ +#ifndef RADIX_TREE_MAP_SHIFT #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) -#else -#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ #endif #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 76a88f35fdc4..31fe2c77d7ae 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -12,6 +12,8 @@ #define CONFIG_SHMEM #define CONFIG_SWAP +#define RADIX_TREE_MAP_SHIFT 3 + #ifndef NULL #define NULL 0 #endif diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c index 5d2fa28cdca3..63bf347aaf33 100644 --- a/tools/testing/radix-tree/regression2.c +++ b/tools/testing/radix-tree/regression2.c @@ -51,13 +51,6 @@ #include "regression.h" -#ifdef __KERNEL__ -#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) -#else -#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ -#endif - -#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) #define PAGECACHE_TAG_DIRTY 0 #define PAGECACHE_TAG_WRITEBACK 1 #define PAGECACHE_TAG_TOWRITE 2 -- cgit From aa1d62d8530d5adf158dd633d360108466f93fcd Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:01:45 -0700 Subject: radix tree test suite: keep regression test runs short Currently the full suite of regression tests take upwards of 30 minutes to run on my development machine. The vast majority of this time is taken by the big_gang_check() and copy_tag_check() tests, which each run their tests through thousands of iterations...does this have value? Without big_gang_check() and copy_tag_check(), the test suite runs in around 15 seconds on my box. Honestly the first time I ever ran through the entire test suite was to gather the timings for this email - it simply takes too long to be useful on a normal basis. Instead, hide the excessive iterations through big_gang_check() and copy_tag_check() tests behind an '-l' flag (for "long run") in case they are still useful, but allow the regression test suite to complete in a reasonable amount of time. We still run each of these tests a few times (3 at present) to try and keep the test coverage. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/main.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 71c5272443b1..122c8b9be17e 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -61,11 +61,11 @@ void __big_gang_check(void) } while (!wrapped); } -void big_gang_check(void) +void big_gang_check(bool long_run) { int i; - for (i = 0; i < 1000; i++) { + for (i = 0; i < (long_run ? 1000 : 3); i++) { __big_gang_check(); srand(time(0)); printf("%d ", i); @@ -270,7 +270,7 @@ static void locate_check(void) item_kill_tree(&tree); } -static void single_thread_tests(void) +static void single_thread_tests(bool long_run) { int i; @@ -285,9 +285,9 @@ static void single_thread_tests(void) printf("after add_and_check: %d allocated\n", nr_allocated); dynamic_height_check(); printf("after dynamic_height_check: %d allocated\n", nr_allocated); - big_gang_check(); + big_gang_check(long_run); printf("after big_gang_check: %d allocated\n", nr_allocated); - for (i = 0; i < 2000; i++) { + for (i = 0; i < (long_run ? 2000 : 3); i++) { copy_tag_check(); printf("%d ", i); fflush(stdout); @@ -295,15 +295,23 @@ static void single_thread_tests(void) printf("after copy_tag_check: %d allocated\n", nr_allocated); } -int main(void) +int main(int argc, char **argv) { + bool long_run = false; + int opt; + + while ((opt = getopt(argc, argv, "l")) != -1) { + if (opt == 'l') + long_run = true; + } + rcu_register_thread(); radix_tree_init(); regression1_test(); regression2_test(); regression3_test(); - single_thread_tests(); + single_thread_tests(long_run); sleep(1); printf("after sleep(1): %d allocated\n", nr_allocated); -- cgit From 7f308671c79899c1b4e275867d3647f64e896e78 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:01:48 -0700 Subject: radix tree test suite: rebuild when headers change When we make changes to radix-tree.h in the regular kernel source (include/linux/radix-tree.h), we really want our test code to be rebuilt. We also include a few other headers from tools/include and probably want to rebuild if these have been changed. Update the makefile so that all of our objects will be rebuilt when any of the headers we depend on are changed. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 604212db9d4b..43febba864bd 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -13,7 +13,7 @@ main: $(OFILES) clean: $(RM) -f $(TARGETS) *.o radix-tree.c -$(OFILES): *.h */*.h +$(OFILES): *.h */*.h ../../../include/linux/radix-tree.h ../../include/linux/*.h radix-tree.c: ../../../lib/radix-tree.c sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ -- cgit From 57578c2ea2cb2e0d362a9212ac83cf90221d4883 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:01:54 -0700 Subject: raxix-tree: introduce CONFIG_RADIX_TREE_MULTIORDER I've been receiving increasingly concerned notes from 0day about how much my recent changes have been bloating the radix tree. Make it happier by only including multiorder support if CONFIG_TRANSPARENT_HUGEPAGES is set. This is an independent Kconfig option, so other radix tree users can also set it if they have a need. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig | 3 +++ lib/radix-tree.c | 26 ++++++++++++++++++-------- mm/Kconfig | 1 + tools/testing/radix-tree/linux/kernel.h | 1 + 4 files changed, 23 insertions(+), 8 deletions(-) (limited to 'tools/testing') diff --git a/lib/Kconfig b/lib/Kconfig index 61d55bd0ed89..d79909dc01ec 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -362,6 +362,9 @@ config INTERVAL_TREE for more information. +config RADIX_TREE_MULTIORDER + bool + config ASSOCIATIVE_ARRAY bool help diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 1624c4117961..799f341977d0 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -484,6 +484,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, slot = node->slots[offset]; } +#ifdef CONFIG_RADIX_TREE_MULTIORDER /* Insert pointers to the canonical entry */ if ((shift - order) > 0) { int i, n = 1 << (shift - order); @@ -499,6 +500,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, node->count++; } } +#endif if (nodep) *nodep = node; @@ -1469,6 +1471,20 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, return deleted; } +static inline void delete_sibling_entries(struct radix_tree_node *node, + void *ptr, unsigned offset) +{ +#ifdef CONFIG_RADIX_TREE_MULTIORDER + int i; + for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { + if (node->slots[offset + i] != ptr) + break; + node->slots[offset + i] = NULL; + node->count--; + } +#endif +} + /** * radix_tree_delete_item - delete an item from a radix tree * @root: radix tree root @@ -1484,7 +1500,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, unsigned long index, void *item) { struct radix_tree_node *node; - unsigned int offset, i; + unsigned int offset; void **slot; void *entry; int tag; @@ -1513,13 +1529,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, radix_tree_tag_clear(root, index, tag); } - /* Delete any sibling slots pointing to this slot */ - for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { - if (node->slots[offset + i] != ptr_to_indirect(slot)) - break; - node->slots[offset + i] = NULL; - node->count--; - } + delete_sibling_entries(node, ptr_to_indirect(slot), offset); node->slots[offset] = NULL; node->count--; diff --git a/mm/Kconfig b/mm/Kconfig index 1a6a28ebcb8b..2664c118b5d2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -404,6 +404,7 @@ config TRANSPARENT_HUGEPAGE bool "Transparent Hugepage Support" depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE select COMPACTION + select RADIX_TREE_MULTIORDER help Transparent Hugepages allows the kernel to use huge pages and huge tlb transparently to the applications whenever possible. diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 31fe2c77d7ae..8ea0ed450810 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -9,6 +9,7 @@ #include "../../include/linux/compiler.h" +#define CONFIG_RADIX_TREE_MULTIORDER #define CONFIG_SHMEM #define CONFIG_SWAP -- cgit From 4f3755d1ae3cd856a5c7da3dea12cced8dc51fbf Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:14 -0700 Subject: radix tree test suite: start adding multiorder tests Test suite infrastructure for working with multiorder entries. The test itself is pretty basic: Add an entry, check that all expected indices return that entry and that indices around that entry don't return an entry. Then delete the entry and check no index returns that entry. Tests a few edge conditions including the multiorder entry at index 0 and at a higher index. Also tests deleting through an alias as well as through the canonical index. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/Makefile | 2 +- tools/testing/radix-tree/main.c | 2 ++ tools/testing/radix-tree/multiorder.c | 58 +++++++++++++++++++++++++++++++++++ tools/testing/radix-tree/test.c | 13 ++++++-- tools/testing/radix-tree/test.h | 6 +++- 5 files changed, 76 insertions(+), 5 deletions(-) create mode 100644 tools/testing/radix-tree/multiorder.c (limited to 'tools/testing') diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 43febba864bd..3b530467148e 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -3,7 +3,7 @@ CFLAGS += -I. -g -Wall -D_LGPL_SOURCE LDFLAGS += -lpthread -lurcu TARGETS = main OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \ - regression1.o regression2.o regression3.o + regression1.o regression2.o regression3.o multiorder.o targets: $(TARGETS) diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 122c8b9be17e..b6a700b00cce 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -275,6 +275,8 @@ static void single_thread_tests(bool long_run) int i; printf("starting single_thread_tests: %d allocated\n", nr_allocated); + multiorder_checks(); + printf("after multiorder_check: %d allocated\n", nr_allocated); locate_check(); printf("after locate_check: %d allocated\n", nr_allocated); tag_check(); diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c new file mode 100644 index 000000000000..cfe718c78eb6 --- /dev/null +++ b/tools/testing/radix-tree/multiorder.c @@ -0,0 +1,58 @@ +/* + * multiorder.c: Multi-order radix tree entry testing + * Copyright (c) 2016 Intel Corporation + * Author: Ross Zwisler + * Author: Matthew Wilcox + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include +#include +#include + +#include "test.h" + +static void multiorder_check(unsigned long index, int order) +{ + unsigned long i; + unsigned long min = index & ~((1UL << order) - 1); + unsigned long max = min + (1UL << order); + RADIX_TREE(tree, GFP_KERNEL); + + printf("Multiorder index %ld, order %d\n", index, order); + + assert(item_insert_order(&tree, index, order) == 0); + + for (i = min; i < max; i++) { + struct item *item = item_lookup(&tree, i); + assert(item != 0); + assert(item->index == index); + } + for (i = 0; i < min; i++) + item_check_absent(&tree, i); + for (i = max; i < 2*max; i++) + item_check_absent(&tree, i); + + assert(item_delete(&tree, index) != 0); + + for (i = 0; i < 2*max; i++) + item_check_absent(&tree, i); +} + +void multiorder_checks(void) +{ + int i; + + for (i = 0; i < 20; i++) { + multiorder_check(200, i); + multiorder_check(0, i); + multiorder_check((1UL << i) + 1, i); + } +} diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 2bebf34cdc27..da54f11e8ba7 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -24,14 +24,21 @@ int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag) return radix_tree_tag_get(root, index, tag); } -int __item_insert(struct radix_tree_root *root, struct item *item) +int __item_insert(struct radix_tree_root *root, struct item *item, + unsigned order) { - return radix_tree_insert(root, item->index, item); + return __radix_tree_insert(root, item->index, order, item); } int item_insert(struct radix_tree_root *root, unsigned long index) { - return __item_insert(root, item_create(index)); + return __item_insert(root, item_create(index), 0); +} + +int item_insert_order(struct radix_tree_root *root, unsigned long index, + unsigned order) +{ + return __item_insert(root, item_create(index), order); } int item_delete(struct radix_tree_root *root, unsigned long index) diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 4e1d95faaa94..53cb595db44a 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -8,8 +8,11 @@ struct item { }; struct item *item_create(unsigned long index); -int __item_insert(struct radix_tree_root *root, struct item *item); +int __item_insert(struct radix_tree_root *root, struct item *item, + unsigned order); int item_insert(struct radix_tree_root *root, unsigned long index); +int item_insert_order(struct radix_tree_root *root, unsigned long index, + unsigned order); int item_delete(struct radix_tree_root *root, unsigned long index); struct item *item_lookup(struct radix_tree_root *root, unsigned long index); @@ -23,6 +26,7 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start, void item_kill_tree(struct radix_tree_root *root); void tag_check(void); +void multiorder_checks(void); struct item * item_tag_set(struct radix_tree_root *root, unsigned long index, int tag); -- cgit From afe0e395b6d1817fa5393f1ad6fcbf71406b016d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:17 -0700 Subject: radix-tree: fix several shrinking bugs with multiorder entries Setting the indirect bit on the user data entry used to be unambiguous because the tree walking code knew not to expect internal nodes in the last level of the tree. Multiorder entries can appear at any level of the tree, and a leaf with the indirect bit set is indistinguishable from a pointer to a node. Introduce a special entry (RADIX_TREE_RETRY) which is neither a valid user entry, nor a valid pointer to a node. The radix_tree_deref_retry() function continues to work the same way, but tree walking code can distinguish it from a pointer to a node. Also fix the condition for setting slot->parent to NULL; it does not matter what height the tree is, it only matters whether slot is an indirect pointer. Move this code above the comment which is referring to the assignment to root->rnode. Also fix the condition for preventing the tree from shrinking to a single entry if it's a multiorder entry. Add a test-case to the test suite that checks that the tree goes back down to its original height after an item is inserted & deleted from a higher index in the tree. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 23 +++++++++++---------- tools/testing/radix-tree/multiorder.c | 39 +++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 11 deletions(-) (limited to 'tools/testing') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index f13ddbba8ace..a1ba41730071 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -80,6 +80,8 @@ static inline void *indirect_to_ptr(void *ptr) return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } +#define RADIX_TREE_RETRY ptr_to_indirect(NULL) + #ifdef CONFIG_RADIX_TREE_MULTIORDER /* Sibling slots point directly to another slot in the same node */ static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node) @@ -1443,6 +1445,14 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) slot = to_free->slots[0]; if (!slot) break; + if (!radix_tree_is_indirect_ptr(slot) && (root->height > 1)) + break; + + if (radix_tree_is_indirect_ptr(slot)) { + slot = indirect_to_ptr(slot); + slot->parent = NULL; + slot = ptr_to_indirect(slot); + } /* * We don't need rcu_assign_pointer(), since we are simply @@ -1451,14 +1461,6 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) * (to_free->slots[0]), it will be safe to dereference the new * one (root->rnode) as far as dependent read barriers go. */ - if (root->height > 1) { - if (!radix_tree_is_indirect_ptr(slot)) - break; - - slot = indirect_to_ptr(slot); - slot->parent = NULL; - slot = ptr_to_indirect(slot); - } root->rnode = slot; root->height--; @@ -1480,9 +1482,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) * also results in a stale slot). So tag the slot as indirect * to force callers to retry. */ - if (root->height == 0) - *((unsigned long *)&to_free->slots[0]) |= - RADIX_TREE_INDIRECT_PTR; + if (!radix_tree_is_indirect_ptr(slot)) + to_free->slots[0] = RADIX_TREE_RETRY; radix_tree_node_free(to_free); } diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index cfe718c78eb6..71f34a047002 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -46,6 +46,41 @@ static void multiorder_check(unsigned long index, int order) item_check_absent(&tree, i); } +static void multiorder_shrink(unsigned long index, int order) +{ + unsigned long i; + unsigned long max = 1 << order; + RADIX_TREE(tree, GFP_KERNEL); + struct radix_tree_node *node; + + printf("Multiorder shrink index %ld, order %d\n", index, order); + + assert(item_insert_order(&tree, 0, order) == 0); + + node = tree.rnode; + + assert(item_insert(&tree, index) == 0); + assert(node != tree.rnode); + + assert(item_delete(&tree, index) != 0); + assert(node == tree.rnode); + + for (i = 0; i < max; i++) { + struct item *item = item_lookup(&tree, i); + assert(item != 0); + assert(item->index == 0); + } + for (i = max; i < 2*max; i++) + item_check_absent(&tree, i); + + if (!item_delete(&tree, 0)) { + printf("failed to delete index %ld (order %d)\n", index, order); abort(); + } + + for (i = 0; i < 2*max; i++) + item_check_absent(&tree, i); +} + void multiorder_checks(void) { int i; @@ -55,4 +90,8 @@ void multiorder_checks(void) multiorder_check(0, i); multiorder_check((1UL << i) + 1, i); } + + for (i = 0; i < 15; i++) + multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i); + } -- cgit From 7b60e9ad59a31dd98c2f7ef841e2882c2b0e0f3b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:23 -0700 Subject: radix-tree: fix multiorder BUG_ON in radix_tree_insert These BUG_ON tests are to ensure that all the tags are clear when inserting a new entry. If we insert a multiorder entry, we'll end up looking at the tags for a different node, and so the BUG_ON can end up triggering spuriously. Also, we now have three tags, not two, so check all three are clear, and check all the root tags with a single call to BUG_ON since the bits are stored contiguously. Include a test-case to ensure this problem does not reoccur. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 14 ++++++++++---- tools/testing/radix-tree/multiorder.c | 12 ++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index f14ada9830ca..ff460423ff4b 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -165,6 +165,11 @@ static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); } +static inline unsigned root_tags_get(struct radix_tree_root *root) +{ + return (__force unsigned)root->gfp_mask >> __GFP_BITS_SHIFT; +} + /* * Returns 1 if any slot in the node has this tag set. * Otherwise returns 0. @@ -604,12 +609,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, rcu_assign_pointer(*slot, item); if (node) { + unsigned offset = get_slot_offset(node, slot); node->count++; - BUG_ON(tag_get(node, 0, index & RADIX_TREE_MAP_MASK)); - BUG_ON(tag_get(node, 1, index & RADIX_TREE_MAP_MASK)); + BUG_ON(tag_get(node, 0, offset)); + BUG_ON(tag_get(node, 1, offset)); + BUG_ON(tag_get(node, 2, offset)); } else { - BUG_ON(root_tag_get(root, 0)); - BUG_ON(root_tag_get(root, 1)); + BUG_ON(root_tags_get(root)); } return 0; diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 71f34a047002..0a311a5f39de 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -81,6 +81,17 @@ static void multiorder_shrink(unsigned long index, int order) item_check_absent(&tree, i); } +static void multiorder_insert_bug(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + + item_insert(&tree, 0); + radix_tree_tag_set(&tree, 0, 0); + item_insert_order(&tree, 3 << 6, 6); + + item_kill_tree(&tree); +} + void multiorder_checks(void) { int i; @@ -94,4 +105,5 @@ void multiorder_checks(void) for (i = 0; i < 15; i++) multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i); + multiorder_insert_bug(); } -- cgit From 21ef533931f73a8e963a6107aa5ec51b192f28be Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:26 -0700 Subject: radix-tree: add support for multi-order iterating This enables the macros radix_tree_for_each_slot() and friends to be used with multi-order entries. The way that this works is that we treat all entries in a given slots[] array as a single chunk. If the index given to radix_tree_next_chunk() happens to point us to a sibling entry, we will back up iter->index so that it points to the canonical entry, and that will be the place where we start our iteration. As we're processing a chunk in radix_tree_next_slot(), we process canonical entries, skip over sibling entries, and restart the chunk lookup if we find a non-sibling indirect pointer. This drops back to the radix_tree_next_chunk() code, which will re-walk the tree and look for another chunk. This allows us to properly handle multi-order entries mixed with other entries that are at various heights in the radix tree. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 69 +++++++++++++++++++++++---- lib/radix-tree.c | 66 ++++++++++++++----------- tools/testing/radix-tree/generated/autoconf.h | 3 ++ tools/testing/radix-tree/linux/kernel.h | 5 +- 4 files changed, 102 insertions(+), 41 deletions(-) create mode 100644 tools/testing/radix-tree/generated/autoconf.h (limited to 'tools/testing') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index e1512a607709..8558d52e1c7b 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -330,8 +330,9 @@ static inline void radix_tree_preload_end(void) * struct radix_tree_iter - radix tree iterator state * * @index: index of current slot - * @next_index: next-to-last index for this chunk + * @next_index: one beyond the last index for this chunk * @tags: bit-mask for tag-iterating + * @shift: shift for the node that holds our slots * * This radix tree iterator works in terms of "chunks" of slots. A chunk is a * subinterval of slots contained within one radix tree leaf node. It is @@ -344,8 +345,20 @@ struct radix_tree_iter { unsigned long index; unsigned long next_index; unsigned long tags; +#ifdef CONFIG_RADIX_TREE_MULTIORDER + unsigned int shift; +#endif }; +static inline unsigned int iter_shift(struct radix_tree_iter *iter) +{ +#ifdef CONFIG_RADIX_TREE_MULTIORDER + return iter->shift; +#else + return 0; +#endif +} + #define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */ #define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */ #define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */ @@ -405,6 +418,12 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter) return NULL; } +static inline unsigned long +__radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots) +{ + return iter->index + (slots << iter_shift(iter)); +} + /** * radix_tree_iter_next - resume iterating when the chunk may be invalid * @iter: iterator state @@ -416,7 +435,7 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter) static inline __must_check void **radix_tree_iter_next(struct radix_tree_iter *iter) { - iter->next_index = iter->index + 1; + iter->next_index = __radix_tree_iter_add(iter, 1); iter->tags = 0; return NULL; } @@ -430,7 +449,12 @@ void **radix_tree_iter_next(struct radix_tree_iter *iter) static __always_inline long radix_tree_chunk_size(struct radix_tree_iter *iter) { - return iter->next_index - iter->index; + return (iter->next_index - iter->index) >> iter_shift(iter); +} + +static inline void *indirect_to_ptr(void *ptr) +{ + return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } /** @@ -448,24 +472,51 @@ static __always_inline void ** radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) { if (flags & RADIX_TREE_ITER_TAGGED) { + void *canon = slot; + iter->tags >>= 1; + if (unlikely(!iter->tags)) + return NULL; + while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && + radix_tree_is_indirect_ptr(slot[1])) { + if (indirect_to_ptr(slot[1]) == canon) { + iter->tags >>= 1; + iter->index = __radix_tree_iter_add(iter, 1); + slot++; + continue; + } + iter->next_index = __radix_tree_iter_add(iter, 1); + return NULL; + } if (likely(iter->tags & 1ul)) { - iter->index++; + iter->index = __radix_tree_iter_add(iter, 1); return slot + 1; } - if (!(flags & RADIX_TREE_ITER_CONTIG) && likely(iter->tags)) { + if (!(flags & RADIX_TREE_ITER_CONTIG)) { unsigned offset = __ffs(iter->tags); iter->tags >>= offset; - iter->index += offset + 1; + iter->index = __radix_tree_iter_add(iter, offset + 1); return slot + offset + 1; } } else { - long size = radix_tree_chunk_size(iter); + long count = radix_tree_chunk_size(iter); + void *canon = slot; - while (--size > 0) { + while (--count > 0) { slot++; - iter->index++; + iter->index = __radix_tree_iter_add(iter, 1); + + if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && + radix_tree_is_indirect_ptr(*slot)) { + if (indirect_to_ptr(*slot) == canon) + continue; + else { + iter->next_index = iter->index; + break; + } + } + if (likely(*slot)) return slot; if (flags & RADIX_TREE_ITER_CONTIG) { diff --git a/lib/radix-tree.c b/lib/radix-tree.c index ff460423ff4b..a4da86e40def 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -75,11 +75,6 @@ static inline void *ptr_to_indirect(void *ptr) return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR); } -static inline void *indirect_to_ptr(void *ptr) -{ - return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); -} - #define RADIX_TREE_RETRY ptr_to_indirect(NULL) #ifdef CONFIG_RADIX_TREE_MULTIORDER @@ -885,6 +880,14 @@ int radix_tree_tag_get(struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_tag_get); +static inline void __set_iter_shift(struct radix_tree_iter *iter, + unsigned int shift) +{ +#ifdef CONFIG_RADIX_TREE_MULTIORDER + iter->shift = shift; +#endif +} + /** * radix_tree_next_chunk - find next chunk of slots for iteration * @@ -898,7 +901,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, { unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; struct radix_tree_node *rnode, *node; - unsigned long index, offset, height; + unsigned long index, offset, maxindex; if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) return NULL; @@ -916,33 +919,39 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if (!index && iter->index) return NULL; - rnode = rcu_dereference_raw(root->rnode); + restart: + shift = radix_tree_load_root(root, &rnode, &maxindex); + if (index > maxindex) + return NULL; + if (radix_tree_is_indirect_ptr(rnode)) { rnode = indirect_to_ptr(rnode); - } else if (rnode && !index) { + } else if (rnode) { /* Single-slot tree */ - iter->index = 0; - iter->next_index = 1; + iter->index = index; + iter->next_index = maxindex + 1; iter->tags = 1; + __set_iter_shift(iter, shift); return (void **)&root->rnode; } else return NULL; -restart: - height = rnode->path & RADIX_TREE_HEIGHT_MASK; - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + shift -= RADIX_TREE_MAP_SHIFT; offset = index >> shift; - /* Index outside of the tree */ - if (offset >= RADIX_TREE_MAP_SIZE) - return NULL; - node = rnode; while (1) { struct radix_tree_node *slot; + unsigned new_off = radix_tree_descend(node, &slot, offset); + + if (new_off < offset) { + offset = new_off; + index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); + index |= offset << shift; + } + if ((flags & RADIX_TREE_ITER_TAGGED) ? - !test_bit(offset, node->tags[tag]) : - !node->slots[offset]) { + !tag_get(node, tag, offset) : !slot) { /* Hole detected */ if (flags & RADIX_TREE_ITER_CONTIG) return NULL; @@ -954,7 +963,10 @@ restart: offset + 1); else while (++offset < RADIX_TREE_MAP_SIZE) { - if (node->slots[offset]) + void *slot = node->slots[offset]; + if (is_sibling_entry(node, slot)) + continue; + if (slot) break; } index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); @@ -964,25 +976,23 @@ restart: return NULL; if (offset == RADIX_TREE_MAP_SIZE) goto restart; + slot = rcu_dereference_raw(node->slots[offset]); } - /* This is leaf-node */ - if (!shift) - break; - - slot = rcu_dereference_raw(node->slots[offset]); - if (slot == NULL) + if ((slot == NULL) || (slot == RADIX_TREE_RETRY)) goto restart; if (!radix_tree_is_indirect_ptr(slot)) break; + node = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; } /* Update the iterator state */ - iter->index = index; - iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1; + iter->index = index & ~((1 << shift) - 1); + iter->next_index = (index | ((RADIX_TREE_MAP_SIZE << shift) - 1)) + 1; + __set_iter_shift(iter, shift); /* Construct iter->tags bit-mask from node->tags[tag] array */ if (flags & RADIX_TREE_ITER_TAGGED) { diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h new file mode 100644 index 000000000000..ad18cf5a2a3a --- /dev/null +++ b/tools/testing/radix-tree/generated/autoconf.h @@ -0,0 +1,3 @@ +#define CONFIG_RADIX_TREE_MULTIORDER 1 +#define CONFIG_SHMEM 1 +#define CONFIG_SWAP 1 diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 8ea0ed450810..be98a47b4e1b 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -8,10 +8,7 @@ #include #include "../../include/linux/compiler.h" - -#define CONFIG_RADIX_TREE_MULTIORDER -#define CONFIG_SHMEM -#define CONFIG_SWAP +#include "../../../include/linux/kconfig.h" #define RADIX_TREE_MAP_SHIFT 3 -- cgit From 643b57d0a9bd4c93625a2f5da4cebc3ceb402b9b Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:29 -0700 Subject: radix tree test suite: multi-order iteration test Add a unit test to verify that we can iterate over multi-order entries properly via a radix_tree_for_each_slot() loop. This was done with a single, somewhat complicated configuration that was meant to test many of the various corner cases having to do with multi-order entries: - An iteration could begin at a sibling entry, and we need to return the canonical entry. - We could have entries of various orders in the same slots[] array. - We could have multi-order entries at a nonzero height, followed by indirect pointers to more radix tree nodes later in that same slots[] array. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/multiorder.c | 92 +++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 0a311a5f39de..ba27fe0a579c 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -92,6 +92,96 @@ static void multiorder_insert_bug(void) item_kill_tree(&tree); } +void multiorder_iteration(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + struct radix_tree_iter iter; + void **slot; + int i, err; + + printf("Multiorder iteration test\n"); + +#define NUM_ENTRIES 11 + int index[NUM_ENTRIES] = {0, 2, 4, 8, 16, 32, 34, 36, 64, 72, 128}; + int order[NUM_ENTRIES] = {1, 1, 2, 3, 4, 1, 0, 1, 3, 0, 7}; + + for (i = 0; i < NUM_ENTRIES; i++) { + err = item_insert_order(&tree, index[i], order[i]); + assert(!err); + } + + i = 0; + /* start from index 1 to verify we find the multi-order entry at 0 */ + radix_tree_for_each_slot(slot, &tree, &iter, 1) { + int height = order[i] / RADIX_TREE_MAP_SHIFT; + int shift = height * RADIX_TREE_MAP_SHIFT; + + assert(iter.index == index[i]); + assert(iter.shift == shift); + i++; + } + + /* + * Now iterate through the tree starting at an elevated multi-order + * entry, beginning at an index in the middle of the range. + */ + i = 8; + radix_tree_for_each_slot(slot, &tree, &iter, 70) { + int height = order[i] / RADIX_TREE_MAP_SHIFT; + int shift = height * RADIX_TREE_MAP_SHIFT; + + assert(iter.index == index[i]); + assert(iter.shift == shift); + i++; + } + + item_kill_tree(&tree); +} + +void multiorder_tagged_iteration(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + struct radix_tree_iter iter; + void **slot; + int i; + + printf("Multiorder tagged iteration test\n"); + +#define MT_NUM_ENTRIES 9 + int index[MT_NUM_ENTRIES] = {0, 2, 4, 16, 32, 40, 64, 72, 128}; + int order[MT_NUM_ENTRIES] = {1, 0, 2, 4, 3, 1, 3, 0, 7}; + +#define TAG_ENTRIES 7 + int tag_index[TAG_ENTRIES] = {0, 4, 16, 40, 64, 72, 128}; + + for (i = 0; i < MT_NUM_ENTRIES; i++) + assert(!item_insert_order(&tree, index[i], order[i])); + + assert(!radix_tree_tagged(&tree, 1)); + + for (i = 0; i < TAG_ENTRIES; i++) + assert(radix_tree_tag_set(&tree, tag_index[i], 1)); + + i = 0; + /* start from index 1 to verify we find the multi-order entry at 0 */ + radix_tree_for_each_tagged(slot, &tree, &iter, 1, 1) { + assert(iter.index == tag_index[i]); + i++; + } + + /* + * Now iterate through the tree starting at an elevated multi-order + * entry, beginning at an index in the middle of the range. + */ + i = 4; + radix_tree_for_each_slot(slot, &tree, &iter, 70) { + assert(iter.index == tag_index[i]); + i++; + } + + item_kill_tree(&tree); +} + void multiorder_checks(void) { int i; @@ -106,4 +196,6 @@ void multiorder_checks(void) multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i); multiorder_insert_bug(); + multiorder_iteration(); + multiorder_tagged_iteration(); } -- cgit From 0fc9b8ca2b1df4948e9516697b1cf12f030968bd Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:41 -0700 Subject: radix-tree test suite: add multi-order tag test Add a generic test for multi-order tag verification, and call it using several different configurations. This test creates a multi-order radix tree using the given index and order, and then sets, checks and clears tags using the indices covered by the single multi-order radix tree entry. With the various calls done by this test we verify root multi-order entries without siblings, multi-order entries without siblings in a radix tree node, as well as multi-order entries with siblings of various sizes. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/multiorder.c | 97 +++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index ba27fe0a579c..1b6fc9b19930 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -19,6 +19,102 @@ #include "test.h" +#define for_each_index(i, base, order) \ + for (i = base; i < base + (1 << order); i++) + +static void __multiorder_tag_test(int index, int order) +{ + RADIX_TREE(tree, GFP_KERNEL); + int base, err, i; + + /* our canonical entry */ + base = index & ~((1 << order) - 1); + + printf("Multiorder tag test with index %d, canonical entry %d\n", + index, base); + + err = item_insert_order(&tree, index, order); + assert(!err); + + /* + * Verify we get collisions for covered indices. We try and fail to + * insert an exceptional entry so we don't leak memory via + * item_insert_order(). + */ + for_each_index(i, base, order) { + err = __radix_tree_insert(&tree, i, order, + (void *)(0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY)); + assert(err == -EEXIST); + } + + for_each_index(i, base, order) { + assert(!radix_tree_tag_get(&tree, i, 0)); + assert(!radix_tree_tag_get(&tree, i, 1)); + } + + assert(radix_tree_tag_set(&tree, index, 0)); + + for_each_index(i, base, order) { + assert(radix_tree_tag_get(&tree, i, 0)); + assert(!radix_tree_tag_get(&tree, i, 1)); + } + + assert(radix_tree_tag_clear(&tree, index, 0)); + + for_each_index(i, base, order) { + assert(!radix_tree_tag_get(&tree, i, 0)); + assert(!radix_tree_tag_get(&tree, i, 1)); + } + + assert(!radix_tree_tagged(&tree, 0)); + assert(!radix_tree_tagged(&tree, 1)); + + item_kill_tree(&tree); +} + +static void multiorder_tag_tests(void) +{ + /* test multi-order entry for indices 0-7 with no sibling pointers */ + __multiorder_tag_test(0, 3); + __multiorder_tag_test(5, 3); + + /* test multi-order entry for indices 8-15 with no sibling pointers */ + __multiorder_tag_test(8, 3); + __multiorder_tag_test(15, 3); + + /* + * Our order 5 entry covers indices 0-31 in a tree with height=2. + * This is broken up as follows: + * 0-7: canonical entry + * 8-15: sibling 1 + * 16-23: sibling 2 + * 24-31: sibling 3 + */ + __multiorder_tag_test(0, 5); + __multiorder_tag_test(29, 5); + + /* same test, but with indices 32-63 */ + __multiorder_tag_test(32, 5); + __multiorder_tag_test(44, 5); + + /* + * Our order 8 entry covers indices 0-255 in a tree with height=3. + * This is broken up as follows: + * 0-63: canonical entry + * 64-127: sibling 1 + * 128-191: sibling 2 + * 192-255: sibling 3 + */ + __multiorder_tag_test(0, 8); + __multiorder_tag_test(190, 8); + + /* same test, but with indices 256-511 */ + __multiorder_tag_test(256, 8); + __multiorder_tag_test(300, 8); + + __multiorder_tag_test(0x12345678UL, 8); +} + static void multiorder_check(unsigned long index, int order) { unsigned long i; @@ -196,6 +292,7 @@ void multiorder_checks(void) multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i); multiorder_insert_bug(); + multiorder_tag_tests(); multiorder_iteration(); multiorder_tagged_iteration(); } -- cgit From 8a14f4d8328cc8615f8a5487c4173f36a8314796 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:44 -0700 Subject: radix-tree: fix radix_tree_create for sibling entries If the radix tree user attempted to insert a colliding entry with an existing multiorder entry, then radix_tree_create() could encounter a sibling entry when walking down the tree to look for a slot. Use radix_tree_descend() to fix the problem, and add a test-case to make sure the problem doesn't come back in future. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 4 ++-- tools/testing/radix-tree/multiorder.c | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index b1ca74489bc2..9b5d8a963897 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -548,9 +548,9 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, /* Go a level down */ height--; shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; node = indirect_to_ptr(slot); - slot = node->slots[offset]; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + offset = radix_tree_descend(node, &slot, offset); } #ifdef CONFIG_RADIX_TREE_MULTIORDER diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 1b6fc9b19930..fc934578e1ef 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -135,6 +135,11 @@ static void multiorder_check(unsigned long index, int order) item_check_absent(&tree, i); for (i = max; i < 2*max; i++) item_check_absent(&tree, i); + for (i = min; i < max; i++) { + static void *entry = (void *) + (0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY); + assert(radix_tree_insert(&tree, i, entry) == -EEXIST); + } assert(item_delete(&tree, index) != 0); -- cgit From 0a2efc6c809b01872321d9c7e7d82d59ac6fde10 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:46 -0700 Subject: radix-tree: rewrite radix_tree_locate_item Use the new multi-order support functions to rewrite radix_tree_locate_item(). Modify the locate tests to test multiorder entries too. [hughd@google.com: radix_tree_locate_item() is often returning the wrong index] Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1605012108490.1166@eggly.anvils Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 87 ++++++++++++++++++++--------------------- tools/testing/radix-tree/main.c | 30 ++++++++------ 2 files changed, 61 insertions(+), 56 deletions(-) (limited to 'tools/testing') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 9b5d8a963897..8329a2e950eb 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1303,58 +1303,54 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); #if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP) #include /* for cond_resched() */ +struct locate_info { + unsigned long found_index; + bool stop; +}; + /* * This linear search is at present only useful to shmem_unuse_inode(). */ static unsigned long __locate(struct radix_tree_node *slot, void *item, - unsigned long index, unsigned long *found_index) + unsigned long index, struct locate_info *info) { unsigned int shift, height; unsigned long i; height = slot->path & RADIX_TREE_HEIGHT_MASK; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; + shift = height * RADIX_TREE_MAP_SHIFT; - for ( ; height > 1; height--) { - i = (index >> shift) & RADIX_TREE_MAP_MASK; - for (;;) { - if (slot->slots[i] != NULL) - break; - index &= ~((1UL << shift) - 1); - index += 1UL << shift; - if (index == 0) - goto out; /* 32-bit wraparound */ - i++; - if (i == RADIX_TREE_MAP_SIZE) - goto out; - } + do { + shift -= RADIX_TREE_MAP_SHIFT; - slot = rcu_dereference_raw(slot->slots[i]); - if (slot == NULL) - goto out; - if (!radix_tree_is_indirect_ptr(slot)) { - if (slot == item) { - *found_index = index + i; - index = 0; - } else { - index += shift; + for (i = (index >> shift) & RADIX_TREE_MAP_MASK; + i < RADIX_TREE_MAP_SIZE; + i++, index += (1UL << shift)) { + struct radix_tree_node *node = + rcu_dereference_raw(slot->slots[i]); + if (node == RADIX_TREE_RETRY) + goto out; + if (!radix_tree_is_indirect_ptr(node)) { + if (node == item) { + info->found_index = index; + info->stop = true; + goto out; + } + continue; } - goto out; + node = indirect_to_ptr(node); + if (is_sibling_entry(slot, node)) + continue; + slot = node; + break; } - slot = indirect_to_ptr(slot); - shift -= RADIX_TREE_MAP_SHIFT; - } + if (i == RADIX_TREE_MAP_SIZE) + break; + } while (shift); - /* Bottom level: check items */ - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { - if (slot->slots[i] == item) { - *found_index = index + i; - index = 0; - goto out; - } - } - index += RADIX_TREE_MAP_SIZE; out: + if ((index == 0) && (i == RADIX_TREE_MAP_SIZE)) + info->stop = true; return index; } @@ -1372,7 +1368,10 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) struct radix_tree_node *node; unsigned long max_index; unsigned long cur_index = 0; - unsigned long found_index = -1; + struct locate_info info = { + .found_index = -1, + .stop = false, + }; do { rcu_read_lock(); @@ -1380,24 +1379,24 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) if (!radix_tree_is_indirect_ptr(node)) { rcu_read_unlock(); if (node == item) - found_index = 0; + info.found_index = 0; break; } node = indirect_to_ptr(node); - max_index = radix_tree_maxindex(node->path & - RADIX_TREE_HEIGHT_MASK); + + max_index = node_maxindex(node); if (cur_index > max_index) { rcu_read_unlock(); break; } - cur_index = __locate(node, item, cur_index, &found_index); + cur_index = __locate(node, item, cur_index, &info); rcu_read_unlock(); cond_resched(); - } while (cur_index != 0 && cur_index <= max_index); + } while (!info.stop && cur_index <= max_index); - return found_index; + return info.found_index; } #else unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index b6a700b00cce..65231e9ba3e8 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -232,17 +232,18 @@ void copy_tag_check(void) item_kill_tree(&tree); } -void __locate_check(struct radix_tree_root *tree, unsigned long index) +void __locate_check(struct radix_tree_root *tree, unsigned long index, + unsigned order) { struct item *item; unsigned long index2; - item_insert(tree, index); + item_insert_order(tree, index, order); item = item_lookup(tree, index); index2 = radix_tree_locate_item(tree, item); if (index != index2) { - printf("index %ld inserted; found %ld\n", - index, index2); + printf("index %ld order %d inserted; found %ld\n", + index, order, index2); abort(); } } @@ -250,21 +251,26 @@ void __locate_check(struct radix_tree_root *tree, unsigned long index) static void locate_check(void) { RADIX_TREE(tree, GFP_KERNEL); + unsigned order; unsigned long offset, index; - for (offset = 0; offset < (1 << 3); offset++) { - for (index = 0; index < (1UL << 5); index++) { - __locate_check(&tree, index + offset); - } - if (radix_tree_locate_item(&tree, &tree) != -1) - abort(); + for (order = 0; order < 20; order++) { + for (offset = 0; offset < (1 << (order + 3)); + offset += (1UL << order)) { + for (index = 0; index < (1UL << (order + 5)); + index += (1UL << order)) { + __locate_check(&tree, index + offset, order); + } + if (radix_tree_locate_item(&tree, &tree) != -1) + abort(); - item_kill_tree(&tree); + item_kill_tree(&tree); + } } if (radix_tree_locate_item(&tree, &tree) != -1) abort(); - __locate_check(&tree, -1); + __locate_check(&tree, -1, 0); if (radix_tree_locate_item(&tree, &tree) != -1) abort(); item_kill_tree(&tree); -- cgit From eb73f7f3300c144c4b886dd56ea4c3d2b2d58249 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:49 -0700 Subject: radix-tree: add test for radix_tree_locate_item() Add a unit test that provides coverage for the bug fixed in the commit entitled "radix-tree: rewrite radix_tree_locate_item fix" from Hugh Dickins. I've verified that this test fails before his patch due to miscalculated 'index' values in __locate() in lib/radix-tree.c, and passes with his fix. Link: http://lkml.kernel.org/r/1462307263-20623-1-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/linux/init.h | 1 + tools/testing/radix-tree/main.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 tools/testing/radix-tree/linux/init.h (limited to 'tools/testing') diff --git a/tools/testing/radix-tree/linux/init.h b/tools/testing/radix-tree/linux/init.h new file mode 100644 index 000000000000..360cabb3c4e7 --- /dev/null +++ b/tools/testing/radix-tree/linux/init.h @@ -0,0 +1 @@ +/* An empty file stub that allows radix-tree.c to compile. */ diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 65231e9ba3e8..b7619ff3b552 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -232,7 +232,7 @@ void copy_tag_check(void) item_kill_tree(&tree); } -void __locate_check(struct radix_tree_root *tree, unsigned long index, +static void __locate_check(struct radix_tree_root *tree, unsigned long index, unsigned order) { struct item *item; @@ -248,12 +248,25 @@ void __locate_check(struct radix_tree_root *tree, unsigned long index, } } +static void __order_0_locate_check(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + int i; + + for (i = 0; i < 50; i++) + __locate_check(&tree, rand() % INT_MAX, 0); + + item_kill_tree(&tree); +} + static void locate_check(void) { RADIX_TREE(tree, GFP_KERNEL); unsigned order; unsigned long offset, index; + __order_0_locate_check(); + for (order = 0; order < 20; order++) { for (offset = 0; offset < (1 << (order + 3)); offset += (1UL << order)) { -- cgit From 070c5ac2740b5db89d381a09fb03b2480b2f7a74 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:52 -0700 Subject: radix-tree: fix radix_tree_range_tag_if_tagged() for multiorder entries I had previously decided that tagging a single multiorder entry would count as tagging 2^order entries for the purposes of 'nr_to_tag'. I now believe that decision to be a mistake, and it should count as a single entry. That's more likely to be what callers expect. When walking back up the tree from a newly-tagged entry, the current code assumed we were starting from the lowest level of the tree; if we have a multiorder entry with an order at least RADIX_TREE_MAP_SHIFT in size then we need to shift the index by 'shift' before we start walking back up the tree, or we will end up not setting tags on higher entries, and then mistakenly thinking that entries below a certain point in the tree are not tagged. If the first index we examine is a sibling entry of a tagged multiorder entry, we were not tagging it. We need to examine the canonical entry, and the easiest way to do that is to use radix_tree_descend(). We then have to skip over sibling slots when looking for the next entry in the tree or we will end up walking back to the canonical entry. Add several tests for radix_tree_range_tag_if_tagged(). Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 76 +++++++++++++++-------------------- tools/testing/radix-tree/multiorder.c | 25 +++++++++++- tools/testing/radix-tree/tag_check.c | 10 +++++ 3 files changed, 67 insertions(+), 44 deletions(-) (limited to 'tools/testing') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8329a2e950eb..8df0df2835b4 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1033,14 +1033,13 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, unsigned long nr_to_tag, unsigned int iftag, unsigned int settag) { - unsigned int height = root->height; - struct radix_tree_node *node = NULL; - struct radix_tree_node *slot; - unsigned int shift; + struct radix_tree_node *slot, *node = NULL; + unsigned long maxindex; + unsigned int shift = radix_tree_load_root(root, &slot, &maxindex); unsigned long tagged = 0; unsigned long index = *first_indexp; - last_index = min(last_index, radix_tree_maxindex(height)); + last_index = min(last_index, maxindex); if (index > last_index) return 0; if (!nr_to_tag) @@ -1049,80 +1048,71 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, *first_indexp = last_index + 1; return 0; } - if (height == 0) { + if (!radix_tree_is_indirect_ptr(slot)) { *first_indexp = last_index + 1; root_tag_set(root, settag); return 1; } - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = indirect_to_ptr(root->rnode); + node = indirect_to_ptr(slot); + shift -= RADIX_TREE_MAP_SHIFT; for (;;) { unsigned long upindex; - int offset; + unsigned offset; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - if (!slot->slots[offset]) + offset = radix_tree_descend(node, &slot, offset); + if (!slot) goto next; - if (!tag_get(slot, iftag, offset)) + if (!tag_get(node, iftag, offset)) goto next; - if (shift) { - node = slot; - slot = slot->slots[offset]; - if (radix_tree_is_indirect_ptr(slot)) { - slot = indirect_to_ptr(slot); - shift -= RADIX_TREE_MAP_SHIFT; - continue; - } else { - slot = node; - node = node->parent; - } + /* Sibling slots never have tags set on them */ + if (radix_tree_is_indirect_ptr(slot)) { + node = indirect_to_ptr(slot); + shift -= RADIX_TREE_MAP_SHIFT; + continue; } /* tag the leaf */ - tagged += 1 << shift; - tag_set(slot, settag, offset); + tagged++; + tag_set(node, settag, offset); + slot = node->parent; /* walk back up the path tagging interior nodes */ - upindex = index; - while (node) { + upindex = index >> shift; + while (slot) { upindex >>= RADIX_TREE_MAP_SHIFT; offset = upindex & RADIX_TREE_MAP_MASK; /* stop if we find a node with the tag already set */ - if (tag_get(node, settag, offset)) + if (tag_get(slot, settag, offset)) break; - tag_set(node, settag, offset); - node = node->parent; + tag_set(slot, settag, offset); + slot = slot->parent; } - /* - * Small optimization: now clear that node pointer. - * Since all of this slot's ancestors now have the tag set - * from setting it above, we have no further need to walk - * back up the tree setting tags, until we update slot to - * point to another radix_tree_node. - */ - node = NULL; - -next: + next: /* Go to next item at level determined by 'shift' */ index = ((index >> shift) + 1) << shift; /* Overflow can happen when last_index is ~0UL... */ if (index > last_index || !index) break; - if (tagged >= nr_to_tag) - break; - while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) { + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + while (offset == 0) { /* * We've fully scanned this node. Go up. Because * last_index is guaranteed to be in the tree, what * we do below cannot wander astray. */ - slot = slot->parent; + node = node->parent; shift += RADIX_TREE_MAP_SHIFT; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; } + if (is_sibling_entry(node, node->slots[offset])) + goto next; + if (tagged >= nr_to_tag) + break; } /* * We need not to tag the root tag if there is no tag which is set with diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index fc934578e1ef..c061f4bd6c05 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -26,6 +26,7 @@ static void __multiorder_tag_test(int index, int order) { RADIX_TREE(tree, GFP_KERNEL); int base, err, i; + unsigned long first = 0; /* our canonical entry */ base = index & ~((1 << order) - 1); @@ -59,13 +60,16 @@ static void __multiorder_tag_test(int index, int order) assert(!radix_tree_tag_get(&tree, i, 1)); } + assert(radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, 10, 0, 1) == 1); assert(radix_tree_tag_clear(&tree, index, 0)); for_each_index(i, base, order) { assert(!radix_tree_tag_get(&tree, i, 0)); - assert(!radix_tree_tag_get(&tree, i, 1)); + assert(radix_tree_tag_get(&tree, i, 1)); } + assert(radix_tree_tag_clear(&tree, index, 1)); + assert(!radix_tree_tagged(&tree, 0)); assert(!radix_tree_tagged(&tree, 1)); @@ -244,6 +248,7 @@ void multiorder_tagged_iteration(void) RADIX_TREE(tree, GFP_KERNEL); struct radix_tree_iter iter; void **slot; + unsigned long first = 0; int i; printf("Multiorder tagged iteration test\n"); @@ -280,6 +285,24 @@ void multiorder_tagged_iteration(void) i++; } + radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, + MT_NUM_ENTRIES, 1, 2); + + i = 0; + radix_tree_for_each_tagged(slot, &tree, &iter, 1, 2) { + assert(iter.index == tag_index[i]); + i++; + } + + first = 1; + radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, + MT_NUM_ENTRIES, 1, 0); + i = 0; + radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) { + assert(iter.index == tag_index[i]); + i++; + } + item_kill_tree(&tree); } diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c index 83136be552a0..b7447ceb75e9 100644 --- a/tools/testing/radix-tree/tag_check.c +++ b/tools/testing/radix-tree/tag_check.c @@ -12,6 +12,7 @@ static void __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag) { + unsigned long first = 0; int ret; item_check_absent(tree, index); @@ -22,6 +23,10 @@ __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag) item_tag_set(tree, index, tag); ret = item_tag_get(tree, index, tag); assert(ret != 0); + ret = radix_tree_range_tag_if_tagged(tree, &first, ~0UL, 10, tag, !tag); + assert(ret == 1); + ret = item_tag_get(tree, index, !tag); + assert(ret != 0); ret = item_delete(tree, index); assert(ret != 0); item_insert(tree, index); @@ -304,6 +309,7 @@ static void single_check(void) struct item *items[BATCH]; RADIX_TREE(tree, GFP_KERNEL); int ret; + unsigned long first = 0; item_insert(&tree, 0); item_tag_set(&tree, 0, 0); @@ -313,6 +319,10 @@ static void single_check(void) assert(ret == 0); verify_tag_consistency(&tree, 0); verify_tag_consistency(&tree, 1); + ret = radix_tree_range_tag_if_tagged(&tree, &first, 10, 10, 0, 1); + assert(ret == 1); + ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1); + assert(ret == 1); item_kill_tree(&tree); } -- cgit From 0796c58325533f87c00949a545eb607baa8441cb Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:55 -0700 Subject: radix-tree: fix radix_tree_dump() for multi-order entries - Print which indices are covered by every leaf entry - Print sibling entries - Print the node pointer instead of the slot entry - Build by default in userspace, and make it accessible to the test-suite Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 48 +++++++++++++++++++++++++---------------- tools/testing/radix-tree/test.h | 1 + 2 files changed, 30 insertions(+), 19 deletions(-) (limited to 'tools/testing') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8df0df2835b4..a1a44f94c171 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -215,27 +215,36 @@ radix_tree_find_next_bit(const unsigned long *addr, return size; } -#if 0 -static void dump_node(void *slot, int height, int offset) +#ifndef __KERNEL__ +static void dump_node(struct radix_tree_node *node, unsigned offset, + unsigned shift, unsigned long index) { - struct radix_tree_node *node; - int i; - - if (!slot) - return; - - if (height == 0) { - pr_debug("radix entry %p offset %d\n", slot, offset); - return; - } + unsigned long i; - node = indirect_to_ptr(slot); pr_debug("radix node: %p offset %d tags %lx %lx %lx path %x count %d parent %p\n", - slot, offset, node->tags[0][0], node->tags[1][0], - node->tags[2][0], node->path, node->count, node->parent); - - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) - dump_node(node->slots[i], height - 1, i); + node, offset, + node->tags[0][0], node->tags[1][0], node->tags[2][0], + node->path, node->count, node->parent); + + for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { + unsigned long first = index | (i << shift); + unsigned long last = first | ((1UL << shift) - 1); + void *entry = node->slots[i]; + if (!entry) + continue; + if (is_sibling_entry(node, entry)) { + pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n", + entry, i, + *(void **)indirect_to_ptr(entry), + first, last); + } else if (!radix_tree_is_indirect_ptr(entry)) { + pr_debug("radix entry %p offset %ld indices %ld-%ld\n", + entry, i, first, last); + } else { + dump_node(indirect_to_ptr(entry), i, + shift - RADIX_TREE_MAP_SHIFT, first); + } + } } /* For debug */ @@ -246,7 +255,8 @@ static void radix_tree_dump(struct radix_tree_root *root) root->gfp_mask >> __GFP_BITS_SHIFT); if (!radix_tree_is_indirect_ptr(root->rnode)) return; - dump_node(root->rnode, root->height, 0); + dump_node(indirect_to_ptr(root->rnode), 0, + (root->height - 1) * RADIX_TREE_MAP_SHIFT, 0); } #endif diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 53cb595db44a..67217c93fe95 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -40,5 +40,6 @@ extern int nr_allocated; /* Normally private parts of lib/radix-tree.c */ void *indirect_to_ptr(void *ptr); +void radix_tree_dump(struct radix_tree_root *root); int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long radix_tree_maxindex(unsigned int height); -- cgit From 0694f0c9e20c47063e4237e5f6649ae5ce5a369a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:16 -0700 Subject: radix tree test suite: remove dependencies on height verify_node() can use node->shift instead of the height. tree_verify_min_height() can be converted over to using node_maxindex() and shift_maxindex() instead of radix_tree_maxindex(). Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/test.c | 34 +++++++++++++++++++++++----------- tools/testing/radix-tree/test.h | 3 ++- 2 files changed, 25 insertions(+), 12 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index da54f11e8ba7..3004c58b9021 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -143,7 +143,7 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start, } static int verify_node(struct radix_tree_node *slot, unsigned int tag, - unsigned int height, int tagged) + int tagged) { int anyset = 0; int i; @@ -159,7 +159,8 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, } } if (tagged != anyset) { - printf("tag: %u, height %u, tagged: %d, anyset: %d\n", tag, height, tagged, anyset); + printf("tag: %u, shift %u, tagged: %d, anyset: %d\n", + tag, slot->shift, tagged, anyset); for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) { printf("tag %d: ", j); for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) @@ -171,10 +172,10 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, assert(tagged == anyset); /* Go for next level */ - if (height > 1) { + if (slot->shift > 0) { for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) if (slot->slots[i]) - if (verify_node(slot->slots[i], tag, height - 1, + if (verify_node(slot->slots[i], tag, !!test_bit(i, slot->tags[tag]))) { printf("Failure at off %d\n", i); for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) { @@ -191,9 +192,10 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag) { - if (!root->height) + struct radix_tree_node *node = root->rnode; + if (!radix_tree_is_indirect_ptr(node)) return; - verify_node(root->rnode, tag, root->height, !!root_tag_get(root, tag)); + verify_node(node, tag, !!root_tag_get(root, tag)); } void item_kill_tree(struct radix_tree_root *root) @@ -218,9 +220,19 @@ void item_kill_tree(struct radix_tree_root *root) void tree_verify_min_height(struct radix_tree_root *root, int maxindex) { - assert(radix_tree_maxindex(root->height) >= maxindex); - if (root->height > 1) - assert(radix_tree_maxindex(root->height-1) < maxindex); - else if (root->height == 1) - assert(radix_tree_maxindex(root->height-1) <= maxindex); + unsigned shift; + struct radix_tree_node *node = root->rnode; + if (!radix_tree_is_indirect_ptr(node)) { + assert(maxindex == 0); + return; + } + + node = indirect_to_ptr(node); + assert(maxindex <= node_maxindex(node)); + + shift = node->shift; + if (shift > 0) + assert(maxindex > shift_maxindex(shift - RADIX_TREE_MAP_SHIFT)); + else + assert(maxindex > 0); } diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 67217c93fe95..866c8c676aa4 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -42,4 +42,5 @@ extern int nr_allocated; void *indirect_to_ptr(void *ptr); void radix_tree_dump(struct radix_tree_root *root); int root_tag_get(struct radix_tree_root *root, unsigned int tag); -unsigned long radix_tree_maxindex(unsigned int height); +unsigned long node_maxindex(struct radix_tree_node *); +unsigned long shift_maxindex(unsigned int shift); -- cgit From 4dd6c0987ca43d6544f4f0a3f86f6ea3bfc60fc1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:27 -0700 Subject: radix-tree: rename indirect_to_ptr() to entry_to_node() Mirrors the earlier commit introducing node_to_entry(). Also change the type returned to be a struct radix_tree_node pointer. That lets us simplify a couple of places in the radix tree shrink & extend paths where we could convert an entry into a pointer, modify the node, then convert the pointer back into an entry. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 12 +++++------ lib/radix-tree.c | 48 ++++++++++++++++++----------------------- tools/testing/radix-tree/test.c | 4 ++-- tools/testing/radix-tree/test.h | 1 - 4 files changed, 28 insertions(+), 37 deletions(-) (limited to 'tools/testing') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c8cc879046c7..b94aa198dd6b 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -442,7 +442,7 @@ radix_tree_chunk_size(struct radix_tree_iter *iter) return (iter->next_index - iter->index) >> iter_shift(iter); } -static inline void *indirect_to_ptr(void *ptr) +static inline struct radix_tree_node *entry_to_node(void *ptr) { return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); } @@ -469,7 +469,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) return NULL; while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && radix_tree_is_indirect_ptr(slot[1])) { - if (indirect_to_ptr(slot[1]) == canon) { + if (entry_to_node(slot[1]) == canon) { iter->tags >>= 1; iter->index = __radix_tree_iter_add(iter, 1); slot++; @@ -499,12 +499,10 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && radix_tree_is_indirect_ptr(*slot)) { - if (indirect_to_ptr(*slot) == canon) + if (entry_to_node(*slot) == canon) continue; - else { - iter->next_index = iter->index; - break; - } + iter->next_index = iter->index; + break; } if (likely(*slot)) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index f66bb3932452..3c3fdd9c5bb3 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -230,13 +230,13 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) if (is_sibling_entry(node, entry)) { pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n", entry, i, - *(void **)indirect_to_ptr(entry), + *(void **)entry_to_node(entry), first, last); } else if (!radix_tree_is_indirect_ptr(entry)) { pr_debug("radix entry %p offset %ld indices %ld-%ld\n", entry, i, first, last); } else { - dump_node(indirect_to_ptr(entry), first); + dump_node(entry_to_node(entry), first); } } } @@ -249,7 +249,7 @@ static void radix_tree_dump(struct radix_tree_root *root) root->gfp_mask >> __GFP_BITS_SHIFT); if (!radix_tree_is_indirect_ptr(root->rnode)) return; - dump_node(indirect_to_ptr(root->rnode), 0); + dump_node(entry_to_node(root->rnode), 0); } #endif @@ -422,7 +422,7 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root, *nodep = node; if (likely(radix_tree_is_indirect_ptr(node))) { - node = indirect_to_ptr(node); + node = entry_to_node(node); *maxindex = node_maxindex(node); return node->shift + RADIX_TREE_MAP_SHIFT; } @@ -467,11 +467,8 @@ static int radix_tree_extend(struct radix_tree_root *root, node->offset = 0; node->count = 1; node->parent = NULL; - if (radix_tree_is_indirect_ptr(slot)) { - slot = indirect_to_ptr(slot); - slot->parent = node; - slot = node_to_entry(slot); - } + if (radix_tree_is_indirect_ptr(slot)) + entry_to_node(slot)->parent = node; node->slots[0] = slot; slot = node_to_entry(node); rcu_assign_pointer(root->rnode, slot); @@ -542,7 +539,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, break; /* Go a level down */ - node = indirect_to_ptr(slot); + node = entry_to_node(slot); offset = (index >> shift) & RADIX_TREE_MAP_MASK; offset = radix_tree_descend(node, &slot, offset); } @@ -645,7 +642,7 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, if (node == RADIX_TREE_RETRY) goto restart; - parent = indirect_to_ptr(node); + parent = entry_to_node(node); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; offset = radix_tree_descend(parent, &node, offset); @@ -729,7 +726,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root, shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - parent = indirect_to_ptr(node); + parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, offset); BUG_ON(!node); @@ -777,7 +774,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - parent = indirect_to_ptr(node); + parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, offset); } @@ -844,7 +841,7 @@ int radix_tree_tag_get(struct radix_tree_root *root, shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - parent = indirect_to_ptr(node); + parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, offset); if (!node) @@ -904,7 +901,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, return NULL; if (radix_tree_is_indirect_ptr(rnode)) { - rnode = indirect_to_ptr(rnode); + rnode = entry_to_node(rnode); } else if (rnode) { /* Single-slot tree */ iter->index = index; @@ -963,7 +960,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if (!radix_tree_is_indirect_ptr(slot)) break; - node = indirect_to_ptr(slot); + node = entry_to_node(slot); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; } @@ -1048,7 +1045,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, return 1; } - node = indirect_to_ptr(slot); + node = entry_to_node(slot); shift -= RADIX_TREE_MAP_SHIFT; for (;;) { @@ -1063,7 +1060,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, goto next; /* Sibling slots never have tags set on them */ if (radix_tree_is_indirect_ptr(slot)) { - node = indirect_to_ptr(slot); + node = entry_to_node(slot); shift -= RADIX_TREE_MAP_SHIFT; continue; } @@ -1322,7 +1319,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, } continue; } - node = indirect_to_ptr(node); + node = entry_to_node(node); if (is_sibling_entry(slot, node)) continue; slot = node; @@ -1367,7 +1364,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) break; } - node = indirect_to_ptr(node); + node = entry_to_node(node); max_index = node_maxindex(node); if (cur_index > max_index) { @@ -1403,7 +1400,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) if (!radix_tree_is_indirect_ptr(to_free)) break; - to_free = indirect_to_ptr(to_free); + to_free = entry_to_node(to_free); /* * The candidate node has more than one child, or its child @@ -1418,11 +1415,8 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) if (!radix_tree_is_indirect_ptr(slot) && to_free->shift) break; - if (radix_tree_is_indirect_ptr(slot)) { - slot = indirect_to_ptr(slot); - slot->parent = NULL; - slot = node_to_entry(slot); - } + if (radix_tree_is_indirect_ptr(slot)) + entry_to_node(slot)->parent = NULL; /* * We don't need rcu_assign_pointer(), since we are simply @@ -1481,7 +1475,7 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, struct radix_tree_node *parent; if (node->count) { - if (node == indirect_to_ptr(root->rnode)) + if (node == entry_to_node(root->rnode)) deleted |= radix_tree_shrink(root); return deleted; } diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 3004c58b9021..7b0bc1fa5919 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -149,7 +149,7 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, int i; int j; - slot = indirect_to_ptr(slot); + slot = entry_to_node(slot); /* Verify consistency at this level */ for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) { @@ -227,7 +227,7 @@ void tree_verify_min_height(struct radix_tree_root *root, int maxindex) return; } - node = indirect_to_ptr(node); + node = entry_to_node(node); assert(maxindex <= node_maxindex(node)); shift = node->shift; diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 866c8c676aa4..e85131369723 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -39,7 +39,6 @@ void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag); extern int nr_allocated; /* Normally private parts of lib/radix-tree.c */ -void *indirect_to_ptr(void *ptr); void radix_tree_dump(struct radix_tree_root *root); int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long node_maxindex(struct radix_tree_node *); -- cgit From b194d16c27af905d6e3552f4851bc7d9fee4e90f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:30 -0700 Subject: radix-tree: rename radix_tree_is_indirect_ptr() As with indirect_to_ptr(), ptr_to_indirect() and RADIX_TREE_INDIRECT_PTR, change radix_tree_is_indirect_ptr() to radix_tree_is_internal_node(). Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 10 ++++----- lib/radix-tree.c | 48 ++++++++++++++++++++--------------------- tools/testing/radix-tree/test.c | 4 ++-- 3 files changed, 31 insertions(+), 31 deletions(-) (limited to 'tools/testing') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index b94aa198dd6b..bad63105e37e 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -57,7 +57,7 @@ #define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \ RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE))) -static inline int radix_tree_is_indirect_ptr(void *ptr) +static inline int radix_tree_is_internal_node(void *ptr) { return (int)((unsigned long)ptr & RADIX_TREE_INTERNAL_NODE); } @@ -224,7 +224,7 @@ static inline void *radix_tree_deref_slot_protected(void **pslot, */ static inline int radix_tree_deref_retry(void *arg) { - return unlikely(radix_tree_is_indirect_ptr(arg)); + return unlikely(radix_tree_is_internal_node(arg)); } /** @@ -259,7 +259,7 @@ static inline int radix_tree_exception(void *arg) */ static inline void radix_tree_replace_slot(void **pslot, void *item) { - BUG_ON(radix_tree_is_indirect_ptr(item)); + BUG_ON(radix_tree_is_internal_node(item)); rcu_assign_pointer(*pslot, item); } @@ -468,7 +468,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) if (unlikely(!iter->tags)) return NULL; while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && - radix_tree_is_indirect_ptr(slot[1])) { + radix_tree_is_internal_node(slot[1])) { if (entry_to_node(slot[1]) == canon) { iter->tags >>= 1; iter->index = __radix_tree_iter_add(iter, 1); @@ -498,7 +498,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) iter->index = __radix_tree_iter_add(iter, 1); if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && - radix_tree_is_indirect_ptr(*slot)) { + radix_tree_is_internal_node(*slot)) { if (entry_to_node(*slot) == canon) continue; iter->next_index = iter->index; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3c3fdd9c5bb3..b65c83036ca4 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -100,7 +100,7 @@ static unsigned radix_tree_descend(struct radix_tree_node *parent, void **entry = rcu_dereference_raw(parent->slots[offset]); #ifdef CONFIG_RADIX_TREE_MULTIORDER - if (radix_tree_is_indirect_ptr(entry)) { + if (radix_tree_is_internal_node(entry)) { unsigned long siboff = get_slot_offset(parent, entry); if (siboff < RADIX_TREE_MAP_SIZE) { offset = siboff; @@ -232,7 +232,7 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) entry, i, *(void **)entry_to_node(entry), first, last); - } else if (!radix_tree_is_indirect_ptr(entry)) { + } else if (!radix_tree_is_internal_node(entry)) { pr_debug("radix entry %p offset %ld indices %ld-%ld\n", entry, i, first, last); } else { @@ -247,7 +247,7 @@ static void radix_tree_dump(struct radix_tree_root *root) pr_debug("radix root: %p rnode %p tags %x\n", root, root->rnode, root->gfp_mask >> __GFP_BITS_SHIFT); - if (!radix_tree_is_indirect_ptr(root->rnode)) + if (!radix_tree_is_internal_node(root->rnode)) return; dump_node(entry_to_node(root->rnode), 0); } @@ -302,7 +302,7 @@ radix_tree_node_alloc(struct radix_tree_root *root) ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask | __GFP_ACCOUNT); out: - BUG_ON(radix_tree_is_indirect_ptr(ret)); + BUG_ON(radix_tree_is_internal_node(ret)); return ret; } @@ -421,7 +421,7 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root, *nodep = node; - if (likely(radix_tree_is_indirect_ptr(node))) { + if (likely(radix_tree_is_internal_node(node))) { node = entry_to_node(node); *maxindex = node_maxindex(node); return node->shift + RADIX_TREE_MAP_SHIFT; @@ -467,7 +467,7 @@ static int radix_tree_extend(struct radix_tree_root *root, node->offset = 0; node->count = 1; node->parent = NULL; - if (radix_tree_is_indirect_ptr(slot)) + if (radix_tree_is_internal_node(slot)) entry_to_node(slot)->parent = node; node->slots[0] = slot; slot = node_to_entry(node); @@ -535,7 +535,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, } else rcu_assign_pointer(root->rnode, node_to_entry(slot)); - } else if (!radix_tree_is_indirect_ptr(slot)) + } else if (!radix_tree_is_internal_node(slot)) break; /* Go a level down */ @@ -585,7 +585,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, void **slot; int error; - BUG_ON(radix_tree_is_indirect_ptr(item)); + BUG_ON(radix_tree_is_internal_node(item)); error = __radix_tree_create(root, index, order, &node, &slot); if (error) @@ -637,7 +637,7 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, if (index > maxindex) return NULL; - while (radix_tree_is_indirect_ptr(node)) { + while (radix_tree_is_internal_node(node)) { unsigned offset; if (node == RADIX_TREE_RETRY) @@ -720,7 +720,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root, shift = radix_tree_load_root(root, &node, &maxindex); BUG_ON(index > maxindex); - while (radix_tree_is_indirect_ptr(node)) { + while (radix_tree_is_internal_node(node)) { unsigned offset; shift -= RADIX_TREE_MAP_SHIFT; @@ -770,7 +770,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, parent = NULL; - while (radix_tree_is_indirect_ptr(node)) { + while (radix_tree_is_internal_node(node)) { shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; @@ -835,7 +835,7 @@ int radix_tree_tag_get(struct radix_tree_root *root, if (node == NULL) return 0; - while (radix_tree_is_indirect_ptr(node)) { + while (radix_tree_is_internal_node(node)) { int offset; shift -= RADIX_TREE_MAP_SHIFT; @@ -900,7 +900,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if (index > maxindex) return NULL; - if (radix_tree_is_indirect_ptr(rnode)) { + if (radix_tree_is_internal_node(rnode)) { rnode = entry_to_node(rnode); } else if (rnode) { /* Single-slot tree */ @@ -957,7 +957,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if ((slot == NULL) || (slot == RADIX_TREE_RETRY)) goto restart; - if (!radix_tree_is_indirect_ptr(slot)) + if (!radix_tree_is_internal_node(slot)) break; node = entry_to_node(slot); @@ -1039,7 +1039,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, *first_indexp = last_index + 1; return 0; } - if (!radix_tree_is_indirect_ptr(slot)) { + if (!radix_tree_is_internal_node(slot)) { *first_indexp = last_index + 1; root_tag_set(root, settag); return 1; @@ -1059,7 +1059,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, if (!tag_get(node, iftag, offset)) goto next; /* Sibling slots never have tags set on them */ - if (radix_tree_is_indirect_ptr(slot)) { + if (radix_tree_is_internal_node(slot)) { node = entry_to_node(slot); shift -= RADIX_TREE_MAP_SHIFT; continue; @@ -1152,7 +1152,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; - if (radix_tree_is_indirect_ptr(results[ret])) { + if (radix_tree_is_internal_node(results[ret])) { slot = radix_tree_iter_retry(&iter); continue; } @@ -1235,7 +1235,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; - if (radix_tree_is_indirect_ptr(results[ret])) { + if (radix_tree_is_internal_node(results[ret])) { slot = radix_tree_iter_retry(&iter); continue; } @@ -1311,7 +1311,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, rcu_dereference_raw(slot->slots[i]); if (node == RADIX_TREE_RETRY) goto out; - if (!radix_tree_is_indirect_ptr(node)) { + if (!radix_tree_is_internal_node(node)) { if (node == item) { info->found_index = index; info->stop = true; @@ -1357,7 +1357,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) do { rcu_read_lock(); node = rcu_dereference_raw(root->rnode); - if (!radix_tree_is_indirect_ptr(node)) { + if (!radix_tree_is_internal_node(node)) { rcu_read_unlock(); if (node == item) info.found_index = 0; @@ -1398,7 +1398,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) struct radix_tree_node *to_free = root->rnode; struct radix_tree_node *slot; - if (!radix_tree_is_indirect_ptr(to_free)) + if (!radix_tree_is_internal_node(to_free)) break; to_free = entry_to_node(to_free); @@ -1412,10 +1412,10 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) slot = to_free->slots[0]; if (!slot) break; - if (!radix_tree_is_indirect_ptr(slot) && to_free->shift) + if (!radix_tree_is_internal_node(slot) && to_free->shift) break; - if (radix_tree_is_indirect_ptr(slot)) + if (radix_tree_is_internal_node(slot)) entry_to_node(slot)->parent = NULL; /* @@ -1445,7 +1445,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) * also results in a stale slot). So tag the slot as indirect * to force callers to retry. */ - if (!radix_tree_is_indirect_ptr(slot)) + if (!radix_tree_is_internal_node(slot)) to_free->slots[0] = RADIX_TREE_RETRY; radix_tree_node_free(to_free); diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 7b0bc1fa5919..a6e8099eaf4f 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -193,7 +193,7 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag) { struct radix_tree_node *node = root->rnode; - if (!radix_tree_is_indirect_ptr(node)) + if (!radix_tree_is_internal_node(node)) return; verify_node(node, tag, !!root_tag_get(root, tag)); } @@ -222,7 +222,7 @@ void tree_verify_min_height(struct radix_tree_root *root, int maxindex) { unsigned shift; struct radix_tree_node *node = root->rnode; - if (!radix_tree_is_indirect_ptr(node)) { + if (!radix_tree_is_internal_node(node)) { assert(maxindex == 0); return; } -- cgit From 8c1244de00ef98f73e21eecc42d84b2742fbb4f9 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:36 -0700 Subject: radix-tree: tidy up next_chunk Convert radix_tree_next_chunk to use 'child' instead of 'slot' as the name of the child node. Also use node_maxindex() where it makes sense. The 'rnode' variable was unnecessary; it doesn't overlap in usage with 'node', so we can just use 'node' the whole way through the function. Improve the testcase to start the walk from every index in the carefully constructed tree, and to accept any index within the range covered by the entry. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 53 +++++++------------ tools/testing/radix-tree/multiorder.c | 99 +++++++++++++++++++---------------- 2 files changed, 74 insertions(+), 78 deletions(-) (limited to 'tools/testing') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 4b4a2a20a3d1..c42867a1769a 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -876,7 +876,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, struct radix_tree_iter *iter, unsigned flags) { unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; - struct radix_tree_node *rnode, *node; + struct radix_tree_node *node, *child; unsigned long index, offset, maxindex; if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) @@ -896,38 +896,29 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, return NULL; restart: - shift = radix_tree_load_root(root, &rnode, &maxindex); + shift = radix_tree_load_root(root, &child, &maxindex); if (index > maxindex) return NULL; + if (!child) + return NULL; - if (radix_tree_is_internal_node(rnode)) { - rnode = entry_to_node(rnode); - } else if (rnode) { + if (!radix_tree_is_internal_node(child)) { /* Single-slot tree */ iter->index = index; iter->next_index = maxindex + 1; iter->tags = 1; - __set_iter_shift(iter, shift); + __set_iter_shift(iter, 0); return (void **)&root->rnode; - } else - return NULL; - - shift -= RADIX_TREE_MAP_SHIFT; - offset = index >> shift; - - node = rnode; - while (1) { - struct radix_tree_node *slot; - unsigned new_off = radix_tree_descend(node, &slot, offset); + } - if (new_off < offset) { - offset = new_off; - index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); - index |= offset << shift; - } + do { + node = entry_to_node(child); + shift -= RADIX_TREE_MAP_SHIFT; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + offset = radix_tree_descend(node, &child, offset); if ((flags & RADIX_TREE_ITER_TAGGED) ? - !tag_get(node, tag, offset) : !slot) { + !tag_get(node, tag, offset) : !child) { /* Hole detected */ if (flags & RADIX_TREE_ITER_CONTIG) return NULL; @@ -945,29 +936,23 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if (slot) break; } - index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); + index &= ~node_maxindex(node); index += offset << shift; /* Overflow after ~0UL */ if (!index) return NULL; if (offset == RADIX_TREE_MAP_SIZE) goto restart; - slot = rcu_dereference_raw(node->slots[offset]); + child = rcu_dereference_raw(node->slots[offset]); } - if ((slot == NULL) || (slot == RADIX_TREE_RETRY)) + if ((child == NULL) || (child == RADIX_TREE_RETRY)) goto restart; - if (!radix_tree_is_internal_node(slot)) - break; - - node = entry_to_node(slot); - shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - } + } while (radix_tree_is_internal_node(child)); /* Update the iterator state */ - iter->index = index & ~((1 << shift) - 1); - iter->next_index = (index | ((RADIX_TREE_MAP_SIZE << shift) - 1)) + 1; + iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); + iter->next_index = (index | node_maxindex(node)) + 1; __set_iter_shift(iter, shift); /* Construct iter->tags bit-mask from node->tags[tag] array */ diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index c061f4bd6c05..39d9b9568fe2 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -202,7 +202,7 @@ void multiorder_iteration(void) RADIX_TREE(tree, GFP_KERNEL); struct radix_tree_iter iter; void **slot; - int i, err; + int i, j, err; printf("Multiorder iteration test\n"); @@ -215,29 +215,21 @@ void multiorder_iteration(void) assert(!err); } - i = 0; - /* start from index 1 to verify we find the multi-order entry at 0 */ - radix_tree_for_each_slot(slot, &tree, &iter, 1) { - int height = order[i] / RADIX_TREE_MAP_SHIFT; - int shift = height * RADIX_TREE_MAP_SHIFT; - - assert(iter.index == index[i]); - assert(iter.shift == shift); - i++; - } - - /* - * Now iterate through the tree starting at an elevated multi-order - * entry, beginning at an index in the middle of the range. - */ - i = 8; - radix_tree_for_each_slot(slot, &tree, &iter, 70) { - int height = order[i] / RADIX_TREE_MAP_SHIFT; - int shift = height * RADIX_TREE_MAP_SHIFT; - - assert(iter.index == index[i]); - assert(iter.shift == shift); - i++; + for (j = 0; j < 256; j++) { + for (i = 0; i < NUM_ENTRIES; i++) + if (j <= (index[i] | ((1 << order[i]) - 1))) + break; + + radix_tree_for_each_slot(slot, &tree, &iter, j) { + int height = order[i] / RADIX_TREE_MAP_SHIFT; + int shift = height * RADIX_TREE_MAP_SHIFT; + int mask = (1 << order[i]) - 1; + + assert(iter.index >= (index[i] &~ mask)); + assert(iter.index <= (index[i] | mask)); + assert(iter.shift == shift); + i++; + } } item_kill_tree(&tree); @@ -249,7 +241,7 @@ void multiorder_tagged_iteration(void) struct radix_tree_iter iter; void **slot; unsigned long first = 0; - int i; + int i, j; printf("Multiorder tagged iteration test\n"); @@ -268,30 +260,49 @@ void multiorder_tagged_iteration(void) for (i = 0; i < TAG_ENTRIES; i++) assert(radix_tree_tag_set(&tree, tag_index[i], 1)); - i = 0; - /* start from index 1 to verify we find the multi-order entry at 0 */ - radix_tree_for_each_tagged(slot, &tree, &iter, 1, 1) { - assert(iter.index == tag_index[i]); - i++; - } - - /* - * Now iterate through the tree starting at an elevated multi-order - * entry, beginning at an index in the middle of the range. - */ - i = 4; - radix_tree_for_each_slot(slot, &tree, &iter, 70) { - assert(iter.index == tag_index[i]); - i++; + for (j = 0; j < 256; j++) { + int mask, k; + + for (i = 0; i < TAG_ENTRIES; i++) { + for (k = i; index[k] < tag_index[i]; k++) + ; + if (j <= (index[k] | ((1 << order[k]) - 1))) + break; + } + + radix_tree_for_each_tagged(slot, &tree, &iter, j, 1) { + for (k = i; index[k] < tag_index[i]; k++) + ; + mask = (1 << order[k]) - 1; + + assert(iter.index >= (tag_index[i] &~ mask)); + assert(iter.index <= (tag_index[i] | mask)); + i++; + } } radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, MT_NUM_ENTRIES, 1, 2); - i = 0; - radix_tree_for_each_tagged(slot, &tree, &iter, 1, 2) { - assert(iter.index == tag_index[i]); - i++; + for (j = 0; j < 256; j++) { + int mask, k; + + for (i = 0; i < TAG_ENTRIES; i++) { + for (k = i; index[k] < tag_index[i]; k++) + ; + if (j <= (index[k] | ((1 << order[k]) - 1))) + break; + } + + radix_tree_for_each_tagged(slot, &tree, &iter, j, 2) { + for (k = i; index[k] < tag_index[i]; k++) + ; + mask = (1 << order[k]) - 1; + + assert(iter.index >= (tag_index[i] &~ mask)); + assert(iter.index <= (tag_index[i] | mask)); + i++; + } } first = 1; -- cgit From ab68f26221366f92611650e8470e6a926801c7d4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2016 09:15:08 -0700 Subject: /dev/dax, pmem: direct access to persistent memory Device DAX is the device-centric analogue of Filesystem DAX (CONFIG_FS_DAX). It allows memory ranges to be allocated and mapped without need of an intervening file system. Device DAX is strict, precise and predictable. Specifically this interface: 1/ Guarantees fault granularity with respect to a given page size (pte, pmd, or pud) set at configuration time. 2/ Enforces deterministic behavior by being strict about what fault scenarios are supported. For example, by forcing MADV_DONTFORK semantics and omitting MAP_PRIVATE support device-dax guarantees that a mapping always behaves/performs the same once established. It is the "what you see is what you get" access mechanism to differentiated memory vs filesystem DAX which has filesystem specific implementation semantics. Persistent memory is the first target, but the mechanism is also targeted for exclusive allocations of performance differentiated memory ranges. This commit is limited to the base device driver infrastructure to associate a dax device with pmem range. Cc: Jeff Moyer Cc: Christoph Hellwig Cc: Andrew Morton Cc: Dave Hansen Cc: Ross Zwisler Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/dax/Kconfig | 25 ++++ drivers/dax/Makefile | 4 + drivers/dax/dax.c | 253 ++++++++++++++++++++++++++++++++++++ drivers/dax/dax.h | 24 ++++ drivers/dax/pmem.c | 158 ++++++++++++++++++++++ tools/testing/nvdimm/Kbuild | 9 ++ tools/testing/nvdimm/config_check.c | 2 + 9 files changed, 478 insertions(+) create mode 100644 drivers/dax/Kconfig create mode 100644 drivers/dax/Makefile create mode 100644 drivers/dax/dax.c create mode 100644 drivers/dax/dax.h create mode 100644 drivers/dax/pmem.c (limited to 'tools/testing') diff --git a/drivers/Kconfig b/drivers/Kconfig index d2ac339de85f..8298eab84a6f 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -190,6 +190,8 @@ source "drivers/android/Kconfig" source "drivers/nvdimm/Kconfig" +source "drivers/dax/Kconfig" + source "drivers/nvmem/Kconfig" source "drivers/hwtracing/stm/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 8f5d076baeb0..0b6f3d60193d 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -66,6 +66,7 @@ obj-$(CONFIG_PARPORT) += parport/ obj-$(CONFIG_NVM) += lightnvm/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ +obj-$(CONFIG_DEV_DAX) += dax/ obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ obj-$(CONFIG_NUBUS) += nubus/ obj-y += macintosh/ diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig new file mode 100644 index 000000000000..86ffbaa891ad --- /dev/null +++ b/drivers/dax/Kconfig @@ -0,0 +1,25 @@ +menuconfig DEV_DAX + tristate "DAX: direct access to differentiated memory" + default m if NVDIMM_DAX + help + Support raw access to differentiated (persistence, bandwidth, + latency...) memory via an mmap(2) capable character + device. Platform firmware or a device driver may identify a + platform memory resource that is differentiated from the + baseline memory pool. Mappings of a /dev/daxX.Y device impose + restrictions that make the mapping behavior deterministic. + +if DEV_DAX + +config DEV_DAX_PMEM + tristate "PMEM DAX: direct access to persistent memory" + depends on NVDIMM_DAX + default DEV_DAX + help + Support raw access to persistent memory. Note that this + driver consumes memory ranges allocated and exported by the + libnvdimm sub-system. + + Say Y if unsure + +endif diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile new file mode 100644 index 000000000000..27c54e38478a --- /dev/null +++ b/drivers/dax/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_DEV_DAX) += dax.o +obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o + +dax_pmem-y := pmem.o diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c new file mode 100644 index 000000000000..4c22a40f2335 --- /dev/null +++ b/drivers/dax/dax.c @@ -0,0 +1,253 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +static int dax_major; +static struct class *dax_class; +static DEFINE_IDA(dax_minor_ida); + +/** + * struct dax_region - mapping infrastructure for dax devices + * @id: kernel-wide unique region for a memory range + * @base: linear address corresponding to @res + * @kref: to pin while other agents have a need to do lookups + * @dev: parent device backing this region + * @align: allocation and mapping alignment for child dax devices + * @res: physical address range of the region + * @pfn_flags: identify whether the pfns are paged back or not + */ +struct dax_region { + int id; + struct ida ida; + void *base; + struct kref kref; + struct device *dev; + unsigned int align; + struct resource res; + unsigned long pfn_flags; +}; + +/** + * struct dax_dev - subdivision of a dax region + * @region - parent region + * @dev - device backing the character device + * @kref - enable this data to be tracked in filp->private_data + * @id - child id in the region + * @num_resources - number of physical address extents in this device + * @res - array of physical address ranges + */ +struct dax_dev { + struct dax_region *region; + struct device *dev; + struct kref kref; + int id; + int num_resources; + struct resource res[0]; +}; + +static void dax_region_free(struct kref *kref) +{ + struct dax_region *dax_region; + + dax_region = container_of(kref, struct dax_region, kref); + kfree(dax_region); +} + +void dax_region_put(struct dax_region *dax_region) +{ + kref_put(&dax_region->kref, dax_region_free); +} +EXPORT_SYMBOL_GPL(dax_region_put); + +static void dax_dev_free(struct kref *kref) +{ + struct dax_dev *dax_dev; + + dax_dev = container_of(kref, struct dax_dev, kref); + dax_region_put(dax_dev->region); + kfree(dax_dev); +} + +static void dax_dev_put(struct dax_dev *dax_dev) +{ + kref_put(&dax_dev->kref, dax_dev_free); +} + +struct dax_region *alloc_dax_region(struct device *parent, int region_id, + struct resource *res, unsigned int align, void *addr, + unsigned long pfn_flags) +{ + struct dax_region *dax_region; + + dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); + + if (!dax_region) + return NULL; + + memcpy(&dax_region->res, res, sizeof(*res)); + dax_region->pfn_flags = pfn_flags; + kref_init(&dax_region->kref); + dax_region->id = region_id; + ida_init(&dax_region->ida); + dax_region->align = align; + dax_region->dev = parent; + dax_region->base = addr; + + return dax_region; +} +EXPORT_SYMBOL_GPL(alloc_dax_region); + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dax_dev *dax_dev = dev_get_drvdata(dev); + unsigned long long size = 0; + int i; + + for (i = 0; i < dax_dev->num_resources; i++) + size += resource_size(&dax_dev->res[i]); + + return sprintf(buf, "%llu\n", size); +} +static DEVICE_ATTR_RO(size); + +static struct attribute *dax_device_attributes[] = { + &dev_attr_size.attr, + NULL, +}; + +static const struct attribute_group dax_device_attribute_group = { + .attrs = dax_device_attributes, +}; + +static const struct attribute_group *dax_attribute_groups[] = { + &dax_device_attribute_group, + NULL, +}; + +static void unregister_dax_dev(void *_dev) +{ + struct device *dev = _dev; + struct dax_dev *dax_dev = dev_get_drvdata(dev); + struct dax_region *dax_region = dax_dev->region; + + dev_dbg(dev, "%s\n", __func__); + + get_device(dev); + device_unregister(dev); + ida_simple_remove(&dax_region->ida, dax_dev->id); + ida_simple_remove(&dax_minor_ida, MINOR(dev->devt)); + put_device(dev); + dax_dev_put(dax_dev); +} + +int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res, + int count) +{ + struct device *parent = dax_region->dev; + struct dax_dev *dax_dev; + struct device *dev; + int rc, minor; + dev_t dev_t; + + dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL); + if (!dax_dev) + return -ENOMEM; + memcpy(dax_dev->res, res, sizeof(*res) * count); + dax_dev->num_resources = count; + kref_init(&dax_dev->kref); + dax_dev->region = dax_region; + kref_get(&dax_region->kref); + + dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL); + if (dax_dev->id < 0) { + rc = dax_dev->id; + goto err_id; + } + + minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL); + if (minor < 0) { + rc = minor; + goto err_minor; + } + + dev_t = MKDEV(dax_major, minor); + dev = device_create_with_groups(dax_class, parent, dev_t, dax_dev, + dax_attribute_groups, "dax%d.%d", dax_region->id, + dax_dev->id); + if (IS_ERR(dev)) { + rc = PTR_ERR(dev); + goto err_create; + } + dax_dev->dev = dev; + + rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev); + if (rc) { + unregister_dax_dev(dev); + return rc; + } + + return 0; + + err_create: + ida_simple_remove(&dax_minor_ida, minor); + err_minor: + ida_simple_remove(&dax_region->ida, dax_dev->id); + err_id: + dax_dev_put(dax_dev); + + return rc; +} +EXPORT_SYMBOL_GPL(devm_create_dax_dev); + +static const struct file_operations dax_fops = { + .llseek = noop_llseek, + .owner = THIS_MODULE, +}; + +static int __init dax_init(void) +{ + int rc; + + rc = register_chrdev(0, "dax", &dax_fops); + if (rc < 0) + return rc; + dax_major = rc; + + dax_class = class_create(THIS_MODULE, "dax"); + if (IS_ERR(dax_class)) { + unregister_chrdev(dax_major, "dax"); + return PTR_ERR(dax_class); + } + + return 0; +} + +static void __exit dax_exit(void) +{ + class_destroy(dax_class); + unregister_chrdev(dax_major, "dax"); + ida_destroy(&dax_minor_ida); +} + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL v2"); +subsys_initcall(dax_init); +module_exit(dax_exit); diff --git a/drivers/dax/dax.h b/drivers/dax/dax.h new file mode 100644 index 000000000000..d8b8f1f25054 --- /dev/null +++ b/drivers/dax/dax.h @@ -0,0 +1,24 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __DAX_H__ +#define __DAX_H__ +struct device; +struct resource; +struct dax_region; +void dax_region_put(struct dax_region *dax_region); +struct dax_region *alloc_dax_region(struct device *parent, + int region_id, struct resource *res, unsigned int align, + void *addr, unsigned long flags); +int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res, + int count); +#endif /* __DAX_H__ */ diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c new file mode 100644 index 000000000000..55d510e36cd1 --- /dev/null +++ b/drivers/dax/pmem.c @@ -0,0 +1,158 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include "../nvdimm/pfn.h" +#include "../nvdimm/nd.h" +#include "dax.h" + +struct dax_pmem { + struct device *dev; + struct percpu_ref ref; + struct completion cmp; +}; + +struct dax_pmem *to_dax_pmem(struct percpu_ref *ref) +{ + return container_of(ref, struct dax_pmem, ref); +} + +static void dax_pmem_percpu_release(struct percpu_ref *ref) +{ + struct dax_pmem *dax_pmem = to_dax_pmem(ref); + + dev_dbg(dax_pmem->dev, "%s\n", __func__); + complete(&dax_pmem->cmp); +} + +static void dax_pmem_percpu_exit(void *data) +{ + struct percpu_ref *ref = data; + struct dax_pmem *dax_pmem = to_dax_pmem(ref); + + dev_dbg(dax_pmem->dev, "%s\n", __func__); + percpu_ref_exit(ref); + wait_for_completion(&dax_pmem->cmp); +} + +static void dax_pmem_percpu_kill(void *data) +{ + struct percpu_ref *ref = data; + struct dax_pmem *dax_pmem = to_dax_pmem(ref); + + dev_dbg(dax_pmem->dev, "%s\n", __func__); + percpu_ref_kill(ref); +} + +static int dax_pmem_probe(struct device *dev) +{ + int rc; + void *addr; + struct resource res; + struct nd_pfn_sb *pfn_sb; + struct dax_pmem *dax_pmem; + struct nd_region *nd_region; + struct nd_namespace_io *nsio; + struct dax_region *dax_region; + struct nd_namespace_common *ndns; + struct nd_dax *nd_dax = to_nd_dax(dev); + struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; + struct vmem_altmap __altmap, *altmap = NULL; + + ndns = nvdimm_namespace_common_probe(dev); + if (IS_ERR(ndns)) + return PTR_ERR(ndns); + nsio = to_nd_namespace_io(&ndns->dev); + + /* parse the 'pfn' info block via ->rw_bytes */ + devm_nsio_enable(dev, nsio); + altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap); + if (IS_ERR(altmap)) + return PTR_ERR(altmap); + devm_nsio_disable(dev, nsio); + + pfn_sb = nd_pfn->pfn_sb; + + if (!devm_request_mem_region(dev, nsio->res.start, + resource_size(&nsio->res), dev_name(dev))) { + dev_warn(dev, "could not reserve region %pR\n", &nsio->res); + return -EBUSY; + } + + dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL); + if (!dax_pmem) + return -ENOMEM; + + dax_pmem->dev = dev; + init_completion(&dax_pmem->cmp); + rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0, + GFP_KERNEL); + if (rc) + return rc; + + rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref); + if (rc) { + dax_pmem_percpu_exit(&dax_pmem->ref); + return rc; + } + + addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap); + if (IS_ERR(addr)) + return PTR_ERR(addr); + + rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref); + if (rc) { + dax_pmem_percpu_kill(&dax_pmem->ref); + return rc; + } + + nd_region = to_nd_region(dev->parent); + dax_region = alloc_dax_region(dev, nd_region->id, &res, + le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP); + if (!dax_region) + return -ENOMEM; + + /* TODO: support for subdividing a dax region... */ + rc = devm_create_dax_dev(dax_region, &res, 1); + + /* child dax_dev instances now own the lifetime of the dax_region */ + dax_region_put(dax_region); + + return rc; +} + +static struct nd_device_driver dax_pmem_driver = { + .probe = dax_pmem_probe, + .drv = { + .name = "dax_pmem", + }, + .type = ND_DRIVER_DAX_PMEM, +}; + +static int __init dax_pmem_init(void) +{ + return nd_driver_register(&dax_pmem_driver); +} +module_init(dax_pmem_init); + +static void __exit dax_pmem_exit(void) +{ + driver_unregister(&dax_pmem_driver.drv); +} +module_exit(dax_pmem_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM); diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 5ff6d3c126a9..785985677159 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -16,6 +16,7 @@ ldflags-y += --wrap=phys_to_pfn_t DRIVERS := ../../../drivers NVDIMM_SRC := $(DRIVERS)/nvdimm ACPI_SRC := $(DRIVERS)/acpi +DAX_SRC := $(DRIVERS)/dax obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o @@ -23,6 +24,8 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o obj-$(CONFIG_ACPI_NFIT) += nfit.o +obj-$(CONFIG_DEV_DAX) += dax.o +obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o nfit-y := $(ACPI_SRC)/nfit.o nfit-y += config_check.o @@ -39,6 +42,12 @@ nd_blk-y += config_check.o nd_e820-y := $(NVDIMM_SRC)/e820.o nd_e820-y += config_check.o +dax-y := $(DAX_SRC)/dax.o +dax-y += config_check.o + +dax_pmem-y := $(DAX_SRC)/pmem.o +dax_pmem-y += config_check.o + libnvdimm-y := $(NVDIMM_SRC)/core.o libnvdimm-y += $(NVDIMM_SRC)/bus.o libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c index f2c7615554eb..adf18bfeca00 100644 --- a/tools/testing/nvdimm/config_check.c +++ b/tools/testing/nvdimm/config_check.c @@ -12,4 +12,6 @@ void check(void) BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX_PMEM)); } -- cgit From 64e2a42bca12e408f0258c56adcf3595bcd116e7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 1 Apr 2016 22:40:53 +0200 Subject: parisc: Add ARCH_TRACEHOOK and regset support By adding TRACEHOOK support we now get a clean user interface to access registers via PTRACE_GETREGS, PTRACE_SETREGS, PTRACE_GETFPREGS and PTRACE_SETFPREGS. The user-visible regset struct user_regs_struct and user_fp_struct are modelled similiar to x86 and can be accessed via PTRACE_GETREGSET. Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 1 + arch/parisc/include/uapi/asm/ptrace.h | 48 ++++ arch/parisc/kernel/ptrace.c | 356 +++++++++++++++++++++++++- tools/testing/selftests/seccomp/seccomp_bpf.c | 8 +- 4 files changed, 410 insertions(+), 3 deletions(-) (limited to 'tools/testing') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 9589511e9c95..6c68c23dd7c2 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -32,6 +32,7 @@ config PARISC select HAVE_DEBUG_STACKOVERFLOW select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK select ARCH_NO_COHERENT_DMA_MMAP select CPU_NO_EFFICIENT_FFS diff --git a/arch/parisc/include/uapi/asm/ptrace.h b/arch/parisc/include/uapi/asm/ptrace.h index c4fa6c8b9ad9..02ce2eb99a7f 100644 --- a/arch/parisc/include/uapi/asm/ptrace.h +++ b/arch/parisc/include/uapi/asm/ptrace.h @@ -13,6 +13,11 @@ * N.B. gdb/strace care about the size and offsets within this * structure. If you change things, you may break object compatibility * for those applications. + * + * Please do NOT use this structure for future programs, but use + * user_regs_struct (see below) instead. + * + * It can be accessed through PTRACE_PEEKUSR/PTRACE_POKEUSR only. */ struct pt_regs { @@ -33,6 +38,45 @@ struct pt_regs { unsigned long ipsw; /* CR22 */ }; +/** + * struct user_regs_struct - User general purpose registers + * + * This is the user-visible general purpose register state structure + * which is used to define the elf_gregset_t. + * + * It can be accessed through PTRACE_GETREGSET with NT_PRSTATUS + * and through PTRACE_GETREGS. + */ +struct user_regs_struct { + unsigned long gr[32]; /* PSW is in gr[0] */ + unsigned long sr[8]; + unsigned long iaoq[2]; + unsigned long iasq[2]; + unsigned long sar; /* CR11 */ + unsigned long iir; /* CR19 */ + unsigned long isr; /* CR20 */ + unsigned long ior; /* CR21 */ + unsigned long ipsw; /* CR22 */ + unsigned long cr0; + unsigned long cr24, cr25, cr26, cr27, cr28, cr29, cr30, cr31; + unsigned long cr8, cr9, cr12, cr13, cr10, cr15; + unsigned long _pad[80-64]; /* pad to ELF_NGREG (80) */ +}; + +/** + * struct user_fp_struct - User floating point registers + * + * This is the user-visible floating point register state structure. + * It uses the same layout and size as elf_fpregset_t. + * + * It can be accessed through PTRACE_GETREGSET with NT_PRFPREG + * and through PTRACE_GETFPREGS. + */ +struct user_fp_struct { + __u64 fr[32]; +}; + + /* * The numbers chosen here are somewhat arbitrary but absolutely MUST * not overlap with any of the number assigned in . @@ -43,5 +87,9 @@ struct pt_regs { */ #define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ +#define PTRACE_GETREGS 18 +#define PTRACE_SETREGS 19 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 #endif /* _UAPI_PARISC_PTRACE_H */ diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 4863761bdbbb..b5458b37fc5b 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -4,18 +4,20 @@ * Copyright (C) 2000 Hewlett-Packard Co, Linuxcare Inc. * Copyright (C) 2000 Matthew Wilcox * Copyright (C) 2000 David Huggins-Daines - * Copyright (C) 2008 Helge Deller + * Copyright (C) 2008-2016 Helge Deller */ #include #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -33,6 +35,14 @@ #define CREATE_TRACE_POINTS #include +/* + * These are our native regset flavors. + */ +enum parisc_regset { + REGSET_GENERAL, + REGSET_FP +}; + /* * Called by kernel/ptrace.c when detaching.. * @@ -117,6 +127,7 @@ void user_enable_block_step(struct task_struct *task) long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { + unsigned long __user *datap = (unsigned long __user *)data; unsigned long tmp; long ret = -EIO; @@ -129,7 +140,7 @@ long arch_ptrace(struct task_struct *child, long request, addr >= sizeof(struct pt_regs)) break; tmp = *(unsigned long *) ((char *) task_regs(child) + addr); - ret = put_user(tmp, (unsigned long __user *) data); + ret = put_user(tmp, datap); break; /* Write the word at location addr in the USER area. This will need @@ -168,6 +179,34 @@ long arch_ptrace(struct task_struct *child, long request, } break; + case PTRACE_GETREGS: /* Get all gp regs from the child. */ + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct), + datap); + + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct), + datap); + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_FP, + 0, sizeof(struct user_fp_struct), + datap); + + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_FP, + 0, sizeof(struct user_fp_struct), + datap); + default: ret = ptrace_request(child, request, addr, data); break; @@ -326,3 +365,316 @@ void do_syscall_trace_exit(struct pt_regs *regs) if (stepping || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, stepping); } + + +/* + * regset functions. + */ + +static int fpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs *regs = task_regs(target); + __u64 *k = kbuf; + __u64 __user *u = ubuf; + __u64 reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < ELF_NFPREG; --count) + *k++ = regs->fr[pos++]; + else + for (; count > 0 && pos < ELF_NFPREG; --count) + if (__put_user(regs->fr[pos++], u++)) + return -EFAULT; + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + ELF_NFPREG * sizeof(reg), -1); +} + +static int fpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_regs(target); + const __u64 *k = kbuf; + const __u64 __user *u = ubuf; + __u64 reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < ELF_NFPREG; --count) + regs->fr[pos++] = *k++; + else + for (; count > 0 && pos < ELF_NFPREG; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + regs->fr[pos++] = reg; + } + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + ELF_NFPREG * sizeof(reg), -1); +} + +#define RI(reg) (offsetof(struct user_regs_struct,reg) / sizeof(long)) + +static unsigned long get_reg(struct pt_regs *regs, int num) +{ + switch (num) { + case RI(gr[0]) ... RI(gr[31]): return regs->gr[num - RI(gr[0])]; + case RI(sr[0]) ... RI(sr[7]): return regs->sr[num - RI(sr[0])]; + case RI(iasq[0]): return regs->iasq[0]; + case RI(iasq[1]): return regs->iasq[1]; + case RI(iaoq[0]): return regs->iaoq[0]; + case RI(iaoq[1]): return regs->iaoq[1]; + case RI(sar): return regs->sar; + case RI(iir): return regs->iir; + case RI(isr): return regs->isr; + case RI(ior): return regs->ior; + case RI(ipsw): return regs->ipsw; + case RI(cr27): return regs->cr27; + case RI(cr0): return mfctl(0); + case RI(cr24): return mfctl(24); + case RI(cr25): return mfctl(25); + case RI(cr26): return mfctl(26); + case RI(cr28): return mfctl(28); + case RI(cr29): return mfctl(29); + case RI(cr30): return mfctl(30); + case RI(cr31): return mfctl(31); + case RI(cr8): return mfctl(8); + case RI(cr9): return mfctl(9); + case RI(cr12): return mfctl(12); + case RI(cr13): return mfctl(13); + case RI(cr10): return mfctl(10); + case RI(cr15): return mfctl(15); + default: return 0; + } +} + +static void set_reg(struct pt_regs *regs, int num, unsigned long val) +{ + switch (num) { + case RI(gr[0]): /* + * PSW is in gr[0]. + * Allow writing to Nullify, Divide-step-correction, + * and carry/borrow bits. + * BEWARE, if you set N, and then single step, it won't + * stop on the nullified instruction. + */ + val &= USER_PSW_BITS; + regs->gr[0] &= ~USER_PSW_BITS; + regs->gr[0] |= val; + return; + case RI(gr[1]) ... RI(gr[31]): + regs->gr[num - RI(gr[0])] = val; + return; + case RI(iaoq[0]): + case RI(iaoq[1]): + regs->iaoq[num - RI(iaoq[0])] = val; + return; + case RI(sar): regs->sar = val; + return; + default: return; +#if 0 + /* do not allow to change any of the following registers (yet) */ + case RI(sr[0]) ... RI(sr[7]): return regs->sr[num - RI(sr[0])]; + case RI(iasq[0]): return regs->iasq[0]; + case RI(iasq[1]): return regs->iasq[1]; + case RI(iir): return regs->iir; + case RI(isr): return regs->isr; + case RI(ior): return regs->ior; + case RI(ipsw): return regs->ipsw; + case RI(cr27): return regs->cr27; + case cr0, cr24, cr25, cr26, cr27, cr28, cr29, cr30, cr31; + case cr8, cr9, cr12, cr13, cr10, cr15; +#endif + } +} + +static int gpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs *regs = task_regs(target); + unsigned long *k = kbuf; + unsigned long __user *u = ubuf; + unsigned long reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < ELF_NGREG; --count) + *k++ = get_reg(regs, pos++); + else + for (; count > 0 && pos < ELF_NGREG; --count) + if (__put_user(get_reg(regs, pos++), u++)) + return -EFAULT; + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + ELF_NGREG * sizeof(reg), -1); +} + +static int gpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_regs(target); + const unsigned long *k = kbuf; + const unsigned long __user *u = ubuf; + unsigned long reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < ELF_NGREG; --count) + set_reg(regs, pos++, *k++); + else + for (; count > 0 && pos < ELF_NGREG; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + set_reg(regs, pos++, reg); + } + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + ELF_NGREG * sizeof(reg), -1); +} + +static const struct user_regset native_regsets[] = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .get = gpr_get, .set = gpr_set + }, + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + .size = sizeof(__u64), .align = sizeof(__u64), + .get = fpr_get, .set = fpr_set + } +}; + +static const struct user_regset_view user_parisc_native_view = { + .name = "parisc", .e_machine = ELF_ARCH, .ei_osabi = ELFOSABI_LINUX, + .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) +}; + +#ifdef CONFIG_64BIT +#include + +static int gpr32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs *regs = task_regs(target); + compat_ulong_t *k = kbuf; + compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < ELF_NGREG; --count) + *k++ = get_reg(regs, pos++); + else + for (; count > 0 && pos < ELF_NGREG; --count) + if (__put_user((compat_ulong_t) get_reg(regs, pos++), u++)) + return -EFAULT; + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + ELF_NGREG * sizeof(reg), -1); +} + +static int gpr32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_regs(target); + const compat_ulong_t *k = kbuf; + const compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < ELF_NGREG; --count) + set_reg(regs, pos++, *k++); + else + for (; count > 0 && pos < ELF_NGREG; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + set_reg(regs, pos++, reg); + } + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + ELF_NGREG * sizeof(reg), -1); +} + +/* + * These are the regset flavors matching the 32bit native set. + */ +static const struct user_regset compat_regsets[] = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), + .get = gpr32_get, .set = gpr32_set + }, + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + .size = sizeof(__u64), .align = sizeof(__u64), + .get = fpr_get, .set = fpr_set + } +}; + +static const struct user_regset_view user_parisc_compat_view = { + .name = "parisc", .e_machine = EM_PARISC, .ei_osabi = ELFOSABI_LINUX, + .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets) +}; +#endif /* CONFIG_64BIT */ + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + BUILD_BUG_ON(sizeof(struct user_regs_struct)/sizeof(long) != ELF_NGREG); + BUILD_BUG_ON(sizeof(struct user_fp_struct)/sizeof(__u64) != ELF_NFPREG); +#ifdef CONFIG_64BIT + if (is_compat_task()) + return &user_parisc_compat_view; +#endif + return &user_parisc_native_view; +} diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7947e568e057..2e58549b2f02 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1234,6 +1234,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM regs[8] # define SYSCALL_RET regs[0] +#elif defined(__hppa__) +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM gr[20] +# define SYSCALL_RET gr[28] #elif defined(__powerpc__) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM gpr[0] @@ -1303,7 +1307,7 @@ void change_syscall(struct __test_metadata *_metadata, EXPECT_EQ(0, ret); #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ - defined(__s390__) + defined(__s390__) || defined(__hppa__) { regs.SYSCALL_NUM = syscall; } @@ -1505,6 +1509,8 @@ TEST_F(TRACE_syscall, syscall_dropped) # define __NR_seccomp 383 # elif defined(__aarch64__) # define __NR_seccomp 277 +# elif defined(__hppa__) +# define __NR_seccomp 338 # elif defined(__powerpc__) # define __NR_seccomp 358 # elif defined(__s390__) -- cgit From a4351cb5511b917556c796d97068318a53a00849 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 31 Mar 2016 13:11:05 +0200 Subject: selftests/thuge-gen: Use platform specific MAP_HUGETLB value Do not hardcode MAP_HUGETLB to 0x40000, since quite some architectures use a different value. Tested with a parisc architecture 64bit kernel. Signed-off-by: Helge Deller --- tools/testing/selftests/vm/thuge-gen.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/vm/thuge-gen.c index c87957295f74..0bc737a75150 100644 --- a/tools/testing/selftests/vm/thuge-gen.c +++ b/tools/testing/selftests/vm/thuge-gen.c @@ -30,7 +30,9 @@ #define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) #define MAP_HUGE_SHIFT 26 #define MAP_HUGE_MASK 0x3f +#if !defined(MAP_HUGETLB) #define MAP_HUGETLB 0x40000 +#endif #define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ #define SHM_HUGE_SHIFT 26 -- cgit From 97f8827a8c7963756ae7d3ee898675b4667eca73 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 23 May 2016 10:04:46 -0400 Subject: ftracetest: Use proper logic to find process PID Half of the test in instance-event.tc was updated to use $! to find the PID of the previous background process that was launched, but the second part of the test still used the parsing of "jobs", which does not work on all shells like $! does. Signed-off-by: Steven Rostedt --- .../selftests/ftrace/test.d/instances/instance-event.tc | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc index 5f2abd03f16b..4c5a061a5b4e 100644 --- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc +++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc @@ -92,28 +92,23 @@ instance_slam() { } instance_slam & -x=`jobs -l` -p1=`echo $x | cut -d' ' -f2` +p1=$! echo $p1 instance_slam & -x=`jobs -l | tail -1` -p2=`echo $x | cut -d' ' -f2` +p2=$! echo $p2 instance_slam & -x=`jobs -l | tail -1` -p3=`echo $x | cut -d' ' -f2` +p3=$! echo $p3 instance_slam & -x=`jobs -l | tail -1` -p4=`echo $x | cut -d' ' -f2` +p4=$! echo $p4 instance_slam & -x=`jobs -l | tail -1` -p5=`echo $x | cut -d' ' -f2` +p5=$! echo $p5 ls -lR >/dev/null -- cgit From fc100a7f89da85da8edd9c2e6f6e8b2490d74ae1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 3 Jun 2016 19:19:20 +0200 Subject: soreuseport: Fix reuseport_bpf testcase on 32bit architectures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following compiler warnings when compiling the reuseport_bpf testcase on a 32 bit platform: reuseport_bpf.c: In function ‘attach_ebpf’: reuseport_bpf.c:114:15: warning: cast from pointer to integer of ifferent size [-Wpointer-to-int-cast] Signed-off-by: Helge Deller Signed-off-by: David S. Miller --- tools/testing/selftests/net/reuseport_bpf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index 96ba386b1b7b..4a8217448f20 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -111,9 +111,9 @@ static void attach_ebpf(int fd, uint16_t mod) memset(&attr, 0, sizeof(attr)); attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; attr.insn_cnt = ARRAY_SIZE(prog); - attr.insns = (uint64_t)prog; - attr.license = (uint64_t)bpf_license; - attr.log_buf = (uint64_t)bpf_log_buf; + attr.insns = (unsigned long) &prog; + attr.license = (unsigned long) &bpf_license; + attr.log_buf = (unsigned long) &bpf_log_buf; attr.log_size = sizeof(bpf_log_buf); attr.log_level = 1; attr.kern_version = 0; @@ -351,8 +351,8 @@ static void test_filter_no_reuseport(const struct test_params p) memset(&eprog, 0, sizeof(eprog)); eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; eprog.insn_cnt = ARRAY_SIZE(ecode); - eprog.insns = (uint64_t)ecode; - eprog.license = (uint64_t)bpf_license; + eprog.insns = (unsigned long) &ecode; + eprog.license = (unsigned long) &bpf_license; eprog.kern_version = 0; memset(&cprog, 0, sizeof(cprog)); -- cgit From 0ded5174e976e2b2a354fe38abf1ebf4492c6dc3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 23 May 2016 15:06:30 -0400 Subject: ftracetest: Fix hist unsupported result in hist selftests When histograms are not configured in the kernel, the ftracetest histogram selftests should return "unsupported" and not "Failed". To detect this, the test scripts have: FEATURE=`grep hist events/sched/sched_process_fork/trigger` if [ -z "$FEATURE" ]; then echo "hist trigger is not supported" exit_unsupported fi The problem is that '-e' is in effect and any error will cause the program to terminate. The grep for 'hist' fails, because it is not compiled it (thus unsupported), but because grep has an error code for failing to find the string, it causes the program to terminate, and is marked as a failed test. Namhyung Kim recommended to test for the "hist" file located in events/sched/sched_process_fork/hist instead, as it is more inline with the other checks. As the hist file is only created if the histogram feature is enabled, that is a valid check. Link: http://lkml.kernel.org/r/20160523151538.4ea9ce0c@gandalf.local.home Suggested-by: Namhyung Kim Acked-by: Namhyung Kim Acked-by: Masami Hiramatsu Fixes: 76929ab51f0ee ("kselftests/ftrace: Add hist trigger testcases") Signed-off-by: Steven Rostedt --- .../testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc | 9 ++++----- tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc | 9 ++++----- .../testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc | 9 ++++----- 3 files changed, 12 insertions(+), 15 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc index c2b61c4fda11..0bf5085281f3 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc @@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi -reset_tracer -do_reset - -FEATURE=`grep hist events/sched/sched_process_fork/trigger` -if [ -z "$FEATURE" ]; then +if [ ! -f events/sched/sched_process_fork/hist ]; then echo "hist trigger is not supported" exit_unsupported fi +reset_tracer +do_reset + echo "Test histogram with execname modifier" echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc index b2902d42a537..a00184cd9c95 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc @@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi -reset_tracer -do_reset - -FEATURE=`grep hist events/sched/sched_process_fork/trigger` -if [ -z "$FEATURE" ]; then +if [ ! -f events/sched/sched_process_fork/hist ]; then echo "hist trigger is not supported" exit_unsupported fi +reset_tracer +do_reset + echo "Test histogram basic tigger" echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc index 03c4a46561fc..3478b00ead57 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc @@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi -reset_tracer -do_reset - -FEATURE=`grep hist events/sched/sched_process_fork/trigger` -if [ -z "$FEATURE" ]; then +if [ ! -f events/sched/sched_process_fork/hist ]; then echo "hist trigger is not supported" exit_unsupported fi +reset_tracer +do_reset + reset_trigger echo "Test histogram multiple tiggers" -- cgit From a7b50abc90afb2e3c27e1bd212643cc53eaf0b60 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 24 Jun 2016 14:48:40 -0700 Subject: selftests/vm/compaction_test: fix write to restore nr_hugepages The write at the end of the test to restore nr_hugepages to its previous value is failing. This is because it is trying to write the number of bytes in the char array as opposed to the number of bytes in the string. Link: http://lkml.kernel.org/r/1465331205-3284-1-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Cc: Shuah Khan Cc: Sri Jayaramappa Cc: Eric B Munson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/compaction_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index 932ff577ffc0..00c4f65d12da 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -136,7 +136,7 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) printf("No of huge pages allocated = %d\n", (atoi(nr_hugepages))); - if (write(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) + if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) != strlen(initial_nr_hugepages)) { perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); goto close_fd; -- cgit