From 6ad515c6d6beea3cbb8557ba2b0f57c39301b0f7 Mon Sep 17 00:00:00 2001 From: Peter Meerwald-Stadler Date: Tue, 15 Mar 2016 22:54:51 +0100 Subject: tools: iio: Update iio_event_monitor names add recently added channel types and modifiers Signed-off-by: Peter Meerwald-Stadler Signed-off-by: Jonathan Cameron --- tools/iio/iio_event_monitor.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index d51eb04202e9..90980f586f7a 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -53,6 +53,9 @@ static const char * const iio_chan_type_name_spec[] = { [IIO_ENERGY] = "energy", [IIO_DISTANCE] = "distance", [IIO_VELOCITY] = "velocity", + [IIO_CONCENTRATION] = "concentration", + [IIO_RESISTANCE] = "resistance", + [IIO_PH] = "ph", }; static const char * const iio_ev_type_text[] = { @@ -102,6 +105,10 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_WALKING] = "walking", [IIO_MOD_STILL] = "still", [IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z] = "sqrt(x^2+y^2+z^2)", + [IIO_MOD_I] = "i", + [IIO_MOD_Q] = "q", + [IIO_MOD_CO2] = "co2", + [IIO_MOD_VOC] = "voc", }; static bool event_is_known(struct iio_event_data *event) @@ -136,6 +143,9 @@ static bool event_is_known(struct iio_event_data *event) case IIO_ENERGY: case IIO_DISTANCE: case IIO_VELOCITY: + case IIO_CONCENTRATION: + case IIO_RESISTANCE: + case IIO_PH: break; default: return false; @@ -174,6 +184,10 @@ static bool event_is_known(struct iio_event_data *event) case IIO_MOD_WALKING: case IIO_MOD_STILL: case IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z: + case IIO_MOD_I: + case IIO_MOD_Q: + case IIO_MOD_CO2: + case IIO_MOD_VOC: break; default: return false; -- cgit From f6a49e5a3f5562855f9e4b9b81916b06ef673771 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 25 Mar 2016 13:36:29 +0100 Subject: tools/gpio: Enable compiler optimization to catch more bugs Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij --- tools/gpio/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index 4d198d5c4203..c155d6bc47a7 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -1,5 +1,5 @@ CC = $(CROSS_COMPILE)gcc -CFLAGS += -Wall -g -D_GNU_SOURCE +CFLAGS += -O2 -Wall -g -D_GNU_SOURCE all: lsgpio -- cgit From 691998fac6f50c9117e279c3fbfa63a23cf7ce2e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 25 Mar 2016 13:36:30 +0100 Subject: tools/gpio: Add missing initialization of device_name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lsgpio.c: In function ‘main’: lsgpio.c:166:7: warning: ‘device_name’ may be used uninitialized in this functio n [-Wmaybe-uninitialized] ret = list_device(device_name); ^ Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij --- tools/gpio/lsgpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c index 1124da375942..eb3f56efd215 100644 --- a/tools/gpio/lsgpio.c +++ b/tools/gpio/lsgpio.c @@ -147,7 +147,7 @@ void print_usage(void) int main(int argc, char **argv) { - const char *device_name; + const char *device_name = NULL; int ret; int c; -- cgit From 793d6b5ea80c9bba8de40b720991b436fa5e174d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 24 Mar 2016 09:39:14 +0100 Subject: iio: tools: make generic_buffer look for "-trigger" All the ST Sensors use the old "-trigger" rather than the standard "-devN" new standard suffix for triggers. Now much to do about it since it is ABI, but make the testing tools recognize it too. Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron --- tools/iio/generic_buffer.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/iio/generic_buffer.c b/tools/iio/generic_buffer.c index 01c4f67801e0..c42b7f836b48 100644 --- a/tools/iio/generic_buffer.c +++ b/tools/iio/generic_buffer.c @@ -304,7 +304,19 @@ int main(int argc, char **argv) } } - /* Verify the trigger exists */ + /* Look for this "-devN" trigger */ + trig_num = find_type_by_name(trigger_name, "trigger"); + if (trig_num < 0) { + /* OK try the simpler "-trigger" suffix instead */ + free(trigger_name); + ret = asprintf(&trigger_name, + "%s-trigger", device_name); + if (ret < 0) { + ret = -ENOMEM; + goto error_free_dev_dir_name; + } + } + trig_num = find_type_by_name(trigger_name, "trigger"); if (trig_num < 0) { fprintf(stderr, "Failed to find the trigger %s\n", -- cgit From 2c5ff1f9a6240d7d2d0928021cb76e421d6aacaf Mon Sep 17 00:00:00 2001 From: Peter Meerwald-Stadler Date: Sun, 20 Mar 2016 16:20:22 +0100 Subject: iio: Add modifier for UV light Signed-off-by: Peter Meerwald-Stadler Signed-off-by: Jonathan Cameron --- Documentation/ABI/testing/sysfs-bus-iio | 4 +++- drivers/iio/industrialio-core.c | 1 + include/uapi/linux/iio/types.h | 1 + tools/iio/iio_event_monitor.c | 2 ++ 4 files changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 17a9210fa6b5..6fb9180e7661 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -1255,12 +1255,14 @@ Description: What: /sys/.../iio:deviceX/in_intensityY_raw What: /sys/.../iio:deviceX/in_intensityY_ir_raw What: /sys/.../iio:deviceX/in_intensityY_both_raw +What: /sys/.../iio:deviceX/in_intensityY_uv_raw KernelVersion: 3.4 Contact: linux-iio@vger.kernel.org Description: Unit-less light intensity. Modifiers both and ir indicate that measurements contains visible and infrared light - components or just infrared light, respectively. + components or just infrared light, respectively. Modifier uv indicates + that measurements contain ultraviolet light components. What: /sys/.../iio:deviceX/in_intensity_red_integration_time What: /sys/.../iio:deviceX/in_intensity_green_integration_time diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 2e768bc99f05..88353ae0ec07 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -101,6 +101,7 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_LIGHT_RED] = "red", [IIO_MOD_LIGHT_GREEN] = "green", [IIO_MOD_LIGHT_BLUE] = "blue", + [IIO_MOD_LIGHT_UV] = "uv", [IIO_MOD_QUATERNION] = "quaternion", [IIO_MOD_TEMP_AMBIENT] = "ambient", [IIO_MOD_TEMP_OBJECT] = "object", diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index c077617f3304..9337eceb9f9d 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -77,6 +77,7 @@ enum iio_modifier { IIO_MOD_Q, IIO_MOD_CO2, IIO_MOD_VOC, + IIO_MOD_LIGHT_UV, }; enum iio_event_type { diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index 90980f586f7a..8d7d9797480b 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -93,6 +93,7 @@ static const char * const iio_modifier_names[] = { [IIO_MOD_LIGHT_RED] = "red", [IIO_MOD_LIGHT_GREEN] = "green", [IIO_MOD_LIGHT_BLUE] = "blue", + [IIO_MOD_LIGHT_UV] = "uv", [IIO_MOD_QUATERNION] = "quaternion", [IIO_MOD_TEMP_AMBIENT] = "ambient", [IIO_MOD_TEMP_OBJECT] = "object", @@ -172,6 +173,7 @@ static bool event_is_known(struct iio_event_data *event) case IIO_MOD_LIGHT_RED: case IIO_MOD_LIGHT_GREEN: case IIO_MOD_LIGHT_BLUE: + case IIO_MOD_LIGHT_UV: case IIO_MOD_QUATERNION: case IIO_MOD_TEMP_AMBIENT: case IIO_MOD_TEMP_OBJECT: -- cgit From d409404cf6e201c2980c7148484c350f33a7912e Mon Sep 17 00:00:00 2001 From: Peter Meerwald-Stadler Date: Sun, 20 Mar 2016 16:20:23 +0100 Subject: iio: Add channel for UV index UV index indicating strength of sunburn-producing ultraviolet (UV) radiation Signed-off-by: Peter Meerwald-Stadler Signed-off-by: Jonathan Cameron --- Documentation/ABI/testing/sysfs-bus-iio | 9 +++++++++ drivers/iio/industrialio-core.c | 1 + include/uapi/linux/iio/types.h | 1 + tools/iio/iio_event_monitor.c | 2 ++ 4 files changed, 13 insertions(+) (limited to 'tools') diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio index 6fb9180e7661..f155eff910f9 100644 --- a/Documentation/ABI/testing/sysfs-bus-iio +++ b/Documentation/ABI/testing/sysfs-bus-iio @@ -1264,6 +1264,15 @@ Description: components or just infrared light, respectively. Modifier uv indicates that measurements contain ultraviolet light components. +What: /sys/.../iio:deviceX/in_uvindex_input +KernelVersion: 4.6 +Contact: linux-iio@vger.kernel.org +Description: + UV light intensity index measuring the human skin's response to + different wavelength of sunlight weighted according to the + standardised CIE Erythemal Action Spectrum. UV index values range + from 0 (low) to >=11 (extreme). + What: /sys/.../iio:deviceX/in_intensity_red_integration_time What: /sys/.../iio:deviceX/in_intensity_green_integration_time What: /sys/.../iio:deviceX/in_intensity_blue_integration_time diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 88353ae0ec07..190a5939fd8c 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -79,6 +79,7 @@ static const char * const iio_chan_type_name_spec[] = { [IIO_CONCENTRATION] = "concentration", [IIO_RESISTANCE] = "resistance", [IIO_PH] = "ph", + [IIO_UVINDEX] = "uvindex", }; static const char * const iio_modifier_names[] = { diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h index 9337eceb9f9d..b0916fc72cce 100644 --- a/include/uapi/linux/iio/types.h +++ b/include/uapi/linux/iio/types.h @@ -38,6 +38,7 @@ enum iio_chan_type { IIO_CONCENTRATION, IIO_RESISTANCE, IIO_PH, + IIO_UVINDEX, }; enum iio_modifier { diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c index 8d7d9797480b..d9b7e0f306c6 100644 --- a/tools/iio/iio_event_monitor.c +++ b/tools/iio/iio_event_monitor.c @@ -56,6 +56,7 @@ static const char * const iio_chan_type_name_spec[] = { [IIO_CONCENTRATION] = "concentration", [IIO_RESISTANCE] = "resistance", [IIO_PH] = "ph", + [IIO_UVINDEX] = "uvindex", }; static const char * const iio_ev_type_text[] = { @@ -147,6 +148,7 @@ static bool event_is_known(struct iio_event_data *event) case IIO_CONCENTRATION: case IIO_RESISTANCE: case IIO_PH: + case IIO_UVINDEX: break; default: return false; -- cgit From baa51277cf5dc844089ea2f6e0f78b1c5ca665d8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 5 Apr 2016 17:40:52 -0700 Subject: libnvdimm, test: add mock SMART data payload Provide simulated SMART data to enable the ndctl implementation of SMART data retrieval and parsing. The payload is defined here, "Section 4.1 SMART and Health Info (Function Index 1)": http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 3 +++ include/uapi/linux/ndctl.h | 36 +++++++++++++++++++++++++++++++- tools/testing/nvdimm/test/nfit.c | 44 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 19f822d7f652..8111b1299515 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -783,6 +783,9 @@ int __init nvdimm_bus_init(void) { int rc; + BUILD_BUG_ON(sizeof(struct nd_smart_payload) != 128); + BUILD_BUG_ON(sizeof(struct nd_smart_threshold_payload) != 8); + rc = bus_register(&nvdimm_bus_type); if (rc) return rc; diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 7cc28ab05b87..59c61e018a86 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015, Intel Corporation. + * Copyright (c) 2014-2016, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU Lesser General Public License, @@ -20,11 +20,45 @@ struct nd_cmd_smart { __u8 data[128]; } __packed; +#define ND_SMART_HEALTH_VALID (1 << 0) +#define ND_SMART_TEMP_VALID (1 << 1) +#define ND_SMART_SPARES_VALID (1 << 2) +#define ND_SMART_ALARM_VALID (1 << 3) +#define ND_SMART_USED_VALID (1 << 4) +#define ND_SMART_SHUTDOWN_VALID (1 << 5) +#define ND_SMART_VENDOR_VALID (1 << 6) +#define ND_SMART_TEMP_TRIP (1 << 0) +#define ND_SMART_SPARE_TRIP (1 << 1) +#define ND_SMART_NON_CRITICAL_HEALTH (1 << 0) +#define ND_SMART_CRITICAL_HEALTH (1 << 1) +#define ND_SMART_FATAL_HEALTH (1 << 2) + +struct nd_smart_payload { + __u32 flags; + __u8 reserved0[4]; + __u8 health; + __u16 temperature; + __u8 spares; + __u8 alarm_flags; + __u8 life_used; + __u8 shutdown_state; + __u8 reserved1; + __u32 vendor_size; + __u8 vendor_data[108]; +} __packed; + struct nd_cmd_smart_threshold { __u32 status; __u8 data[8]; } __packed; +struct nd_smart_threshold_payload { + __u16 alarm_control; + __u16 temperature; + __u8 spares; + __u8 reserved[3]; +} __packed; + struct nd_cmd_dimm_flags { __u32 status; __u32 flags; diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 3187322eeed7..d1c98d4386d4 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -330,6 +330,42 @@ static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err, return 0; } +static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len) +{ + static const struct nd_smart_payload smart_data = { + .flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID + | ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID + | ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID, + .health = ND_SMART_NON_CRITICAL_HEALTH, + .temperature = 23 * 16, + .spares = 75, + .alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, + .life_used = 5, + .shutdown_state = 0, + .vendor_size = 0, + }; + + if (buf_len < sizeof(*smart)) + return -EINVAL; + memcpy(smart->data, &smart_data, sizeof(smart_data)); + return 0; +} + +static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t, + unsigned int buf_len) +{ + static const struct nd_smart_threshold_payload smart_t_data = { + .alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP, + .temperature = 40 * 16, + .spares = 5, + }; + + if (buf_len < sizeof(*smart_t)) + return -EINVAL; + memcpy(smart_t->data, &smart_t_data, sizeof(smart_t_data)); + return 0; +} + static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) @@ -368,6 +404,12 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nfit_test_cmd_set_config_data(buf, buf_len, t->label[i]); break; + case ND_CMD_SMART: + rc = nfit_test_cmd_smart(buf, buf_len); + break; + case ND_CMD_SMART_THRESHOLD: + rc = nfit_test_cmd_smart_threshold(buf, buf_len); + break; default: return -ENOTTY; } @@ -1254,10 +1296,12 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); + set_bit(ND_CMD_SMART, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_dsm_force_en); } static void nfit_test1_setup(struct nfit_test *t) -- cgit From 5f991a921a08279f573fa5ecf84261a3a6fac0cc Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 14 Apr 2016 10:26:47 +0200 Subject: iio: tools: generic_buffer: auto-enable channels If no channels are enabled when we run generic_buffer on a device, add a command-line option to just enable all of them, run the sampling and disable them all again afterwards. This is extremely useful when I'm low-level testing my sensors with interrupts and triggers, sample session: root@Ux500:/ lsiio Device 000: lsm303dlh_accel Device 001: lis331dl_accel Device 002: l3g4200d Device 003: lsm303dlh_magn Device 004: lps001wp Trigger 000: lsm303dlh_accel-trigger Trigger 001: lis331dl_accel-trigger Trigger 002: l3g4200d-trigger root@Ux500:/ generic_buffer -a -c 10 -n l3g4200d iio device number being used is 2 iio trigger number being used is 2 No channels are enabled, enabling all channels Enabling: in_anglvel_x_en Enabling: in_anglvel_y_en Enabling: in_anglvel_z_en Enabling: in_timestamp_en /sys/bus/iio/devices/iio:device2 l3g4200d-trigger -3.593664 -0.713133 4.870143 946684863662292480 3.225546 0.867357 -4.945878 946684863671875000 -0.676413 0.127296 0.106641 946684863681488037 -0.661113 0.110160 0.128826 946684863690673828 -0.664173 0.113067 0.123471 946684863700683593 -0.664938 0.109395 0.124848 946684863710144042 -0.664173 0.110619 0.130203 946684863719512939 -0.666162 0.111231 0.132651 946684863729125976 -0.668610 0.111690 0.130662 946684863738739013 -0.660501 0.110466 0.131733 946684863748565673 Disabling: in_anglvel_x_en Disabling: in_anglvel_y_en Disabling: in_anglvel_z_en Disabling: in_timestamp_en Pure awesomeness. If some channels have been enabled through scripts or manual interaction, nothing happens. Cc: Peter Meerwald-Stadler Acked-by: Daniel Baluta Signed-off-by: Linus Walleij Signed-off-by: Jonathan Cameron --- tools/iio/generic_buffer.c | 102 +++++++++++++++++++++++++++++++++++++++++++-- tools/iio/iio_utils.h | 7 ++++ 2 files changed, 105 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/iio/generic_buffer.c b/tools/iio/generic_buffer.c index c42b7f836b48..2429c78de940 100644 --- a/tools/iio/generic_buffer.c +++ b/tools/iio/generic_buffer.c @@ -34,6 +34,15 @@ #include #include "iio_utils.h" +/** + * enum autochan - state for the automatic channel enabling mechanism + */ +enum autochan { + AUTOCHANNELS_DISABLED, + AUTOCHANNELS_ENABLED, + AUTOCHANNELS_ACTIVE, +}; + /** * size_from_channelarray() - calculate the storage size of a scan * @channels: the channel info array @@ -191,10 +200,51 @@ void process_scan(char *data, printf("\n"); } +static int enable_disable_all_channels(char *dev_dir_name, int enable) +{ + const struct dirent *ent; + char scanelemdir[256]; + DIR *dp; + int ret; + + snprintf(scanelemdir, sizeof(scanelemdir), + FORMAT_SCAN_ELEMENTS_DIR, dev_dir_name); + scanelemdir[sizeof(scanelemdir)-1] = '\0'; + + dp = opendir(scanelemdir); + if (!dp) { + fprintf(stderr, "Enabling/disabling channels: can't open %s\n", + scanelemdir); + return -EIO; + } + + ret = -ENOENT; + while (ent = readdir(dp), ent) { + if (iioutils_check_suffix(ent->d_name, "_en")) { + printf("%sabling: %s\n", + enable ? "En" : "Dis", + ent->d_name); + ret = write_sysfs_int(ent->d_name, scanelemdir, + enable); + if (ret < 0) + fprintf(stderr, "Failed to enable/disable %s\n", + ent->d_name); + } + } + + if (closedir(dp) == -1) { + perror("Enabling/disabling channels: " + "Failed to close directory"); + return -errno; + } + return 0; +} + void print_usage(void) { fprintf(stderr, "Usage: generic_buffer [options]...\n" "Capture, convert and output data from IIO device buffer\n" + " -a Auto-activate all available channels\n" " -c Do n conversions\n" " -e Disable wait for event (new data)\n" " -g Use trigger-less mode\n" @@ -225,12 +275,16 @@ int main(int argc, char **argv) int scan_size; int noevents = 0; int notrigger = 0; + enum autochan autochannels = AUTOCHANNELS_DISABLED; char *dummy; struct iio_channel_info *channels; - while ((c = getopt(argc, argv, "c:egl:n:t:w:")) != -1) { + while ((c = getopt(argc, argv, "ac:egl:n:t:w:")) != -1) { switch (c) { + case 'a': + autochannels = AUTOCHANNELS_ENABLED; + break; case 'c': errno = 0; num_loops = strtoul(optarg, &dummy, 10); @@ -340,12 +394,47 @@ int main(int argc, char **argv) "diag %s\n", dev_dir_name); goto error_free_triggername; } - if (!num_channels) { + if (num_channels && autochannels == AUTOCHANNELS_ENABLED) { + fprintf(stderr, "Auto-channels selected but some channels " + "are already activated in sysfs\n"); + fprintf(stderr, "Proceeding without activating any channels\n"); + } + + if (!num_channels && autochannels == AUTOCHANNELS_ENABLED) { + fprintf(stderr, + "No channels are enabled, enabling all channels\n"); + + ret = enable_disable_all_channels(dev_dir_name, 1); + if (ret) { + fprintf(stderr, "Failed to enable all channels\n"); + goto error_free_triggername; + } + + /* This flags that we need to disable the channels again */ + autochannels = AUTOCHANNELS_ACTIVE; + + ret = build_channel_array(dev_dir_name, &channels, + &num_channels); + if (ret) { + fprintf(stderr, "Problem reading scan element " + "information\n" + "diag %s\n", dev_dir_name); + goto error_disable_channels; + } + if (!num_channels) { + fprintf(stderr, "Still no channels after " + "auto-enabling, giving up\n"); + goto error_disable_channels; + } + } + + if (!num_channels && autochannels == AUTOCHANNELS_DISABLED) { fprintf(stderr, "No channels are enabled, we have nothing to scan.\n"); fprintf(stderr, "Enable channels manually in " FORMAT_SCAN_ELEMENTS_DIR - "/*_en and try again.\n", dev_dir_name); + "/*_en or pass -a to autoenable channels and " + "try again.\n", dev_dir_name); ret = -ENOENT; goto error_free_triggername; } @@ -479,7 +568,12 @@ error_free_channels: error_free_triggername: if (datardytrigger) free(trigger_name); - +error_disable_channels: + if (autochannels == AUTOCHANNELS_ACTIVE) { + ret = enable_disable_all_channels(dev_dir_name, 0); + if (ret) + fprintf(stderr, "Failed to disable all channels\n"); + } error_free_dev_dir_name: free(dev_dir_name); diff --git a/tools/iio/iio_utils.h b/tools/iio/iio_utils.h index e3503bfe538b..780f2014f8fa 100644 --- a/tools/iio/iio_utils.h +++ b/tools/iio/iio_utils.h @@ -52,6 +52,13 @@ struct iio_channel_info { unsigned location; }; +static inline int iioutils_check_suffix(const char *str, const char *suffix) +{ + return strlen(str) >= strlen(suffix) && + strncmp(str+strlen(str)-strlen(suffix), + suffix, strlen(suffix)) == 0; +} + int iioutils_break_up_name(const char *full_name, char **generic_name); int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used, unsigned *shift, uint64_t *mask, unsigned *be, -- cgit From cfa0963dc474fd098d6a7a722dcecfc143a4b377 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Mar 2016 12:55:00 -0600 Subject: kselftests/ftrace : Add event trigger testcases This adds simple event trigger testcases for ftracetest, which covers following triggers. - traceon-traceoff trigger - enable/disable_event trigger - snapshot trigger - stacktrace trigger - trigger filters Here is the test result. ---- # ./ftracetest test.d/trigger/ === Ftrace unit tests === [1] event trigger - test event enable/disable trigger [PASS] [2] event trigger - test trigger filter [PASS] [3] event trigger - test snapshot-trigger [PASS] [4] event trigger - test stacktrace-trigger [PASS] [5] event trigger - test traceon/off trigger [PASS] # of passed: 5 # of failed: 0 # of unresolved: 0 # of untested: 0 # of unsupported: 0 # of xfailed: 0 # of undefined(test bug): 0 ---- Link: http://lkml.kernel.org/r/12b9c2b289a0dc1e4386e7b77674611a83abca85.1457029949.git.tom.zanussi@linux.intel.com Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Shuah Khan Cc: Namhyung Kim Cc: Tom Zanussi Reviewed-by: Namhyung Kim Signed-off-by: Steven Rostedt --- tools/testing/selftests/ftrace/test.d/functions | 9 +++ .../ftrace/test.d/trigger/trigger-eventonoff.tc | 64 ++++++++++++++++++++++ .../ftrace/test.d/trigger/trigger-filter.tc | 59 ++++++++++++++++++++ .../ftrace/test.d/trigger/trigger-snapshot.tc | 56 +++++++++++++++++++ .../ftrace/test.d/trigger/trigger-stacktrace.tc | 53 ++++++++++++++++++ .../ftrace/test.d/trigger/trigger-traceonoff.tc | 58 ++++++++++++++++++++ 6 files changed, 299 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 5d8cd06d920f..c37262f6c269 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -14,3 +14,12 @@ enable_tracing() { # start trace recording reset_tracer() { # reset the current tracer echo nop > current_tracer } + +reset_trigger() { # reset all current setting triggers + grep -v ^# events/*/*/trigger | + while read line; do + cmd=`echo $line | cut -f2- -d: | cut -f1 -d" "` + echo "!$cmd" > `echo $line | cut -f1 -d:` + done +} + diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc new file mode 100644 index 000000000000..1a9445021bf1 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc @@ -0,0 +1,64 @@ +#!/bin/sh +# description: event trigger - test event enable/disable trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep enable_event events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "event enable/disable trigger is not supported" + exit_unsupported +fi + +echo "Test enable_event trigger" +echo 0 > events/sched/sched_switch/enable +echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat events/sched/sched_switch/enable` != '1*' ]; then + fail "enable_event trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test disable_event trigger" +echo 1 > events/sched/sched_switch/enable +echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat events/sched/sched_switch/enable` != '0*' ]; then + fail "disable_event trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test semantic error for event enable/disable trigger" +! echo 'enable_event:nogroup:noevent' > events/sched/sched_process_fork/trigger +! echo 'disable_event+1' > events/sched/sched_process_fork/trigger +echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger +! echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger +! echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc new file mode 100644 index 000000000000..514e466e198b --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc @@ -0,0 +1,59 @@ +#!/bin/sh +# description: event trigger - test trigger filter + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test trigger filter" +echo 1 > tracing_on +echo 'traceoff if child_pid == 0' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat tracing_on` -ne 1 ]; then + fail "traceoff trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test semantic error for trigger filter" +! echo 'traceoff if a' > events/sched/sched_process_fork/trigger +! echo 'traceoff if common_pid=0' > events/sched/sched_process_fork/trigger +! echo 'traceoff if common_pid==b' > events/sched/sched_process_fork/trigger +echo 'traceoff if common_pid == 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger +! echo 'traceoff if common_pid == child_pid' > events/sched/sched_process_fork/trigger +echo 'traceoff if common_pid <= 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger +echo 'traceoff if common_pid >= 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger +echo 'traceoff if parent_pid >= 0 && child_pid >= 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger +echo 'traceoff if parent_pid >= 0 || child_pid >= 0' > events/sched/sched_process_fork/trigger +echo '!traceoff' > events/sched/sched_process_fork/trigger + + + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc new file mode 100644 index 000000000000..f84b80d551a2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc @@ -0,0 +1,56 @@ +#!/bin/sh +# description: event trigger - test snapshot-trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep snapshot events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "snapshot trigger is not supported" + exit_unsupported +fi + +echo "Test snapshot tigger" +echo 0 > snapshot +echo 1 > events/sched/sched_process_fork/enable +( echo "forked") +echo 'snapshot:1' > events/sched/sched_process_fork/trigger +( echo "forked") +grep sched_process_fork snapshot > /dev/null || \ + fail "snapshot trigger on sched_process_fork did not work" + +reset_trigger +echo 0 > snapshot +echo 0 > events/sched/sched_process_fork/enable + +echo "Test snapshot semantic errors" + +! echo "snapshot+1" > events/sched/sched_process_fork/trigger +echo "snapshot" > events/sched/sched_process_fork/trigger +! echo "snapshot" > events/sched/sched_process_fork/trigger + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc new file mode 100644 index 000000000000..9fa23b085def --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc @@ -0,0 +1,53 @@ +#!/bin/sh +# description: event trigger - test stacktrace-trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep stacktrace events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "stacktrace trigger is not supported" + exit_unsupported +fi + +echo "Test stacktrace tigger" +echo 0 > trace +echo 0 > options/stacktrace +echo 'stacktrace' > events/sched/sched_process_fork/trigger +( echo "forked") +grep "" trace > /dev/null || \ + fail "stacktrace trigger on sched_process_fork did not work" + +reset_trigger + +echo "Test stacktrace semantic errors" + +! echo "stacktrace:foo" > events/sched/sched_process_fork/trigger +echo "stacktrace" > events/sched/sched_process_fork/trigger +! echo "stacktrace" > events/sched/sched_process_fork/trigger + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc new file mode 100644 index 000000000000..87648e5f987c --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc @@ -0,0 +1,58 @@ +#!/bin/sh +# description: event trigger - test traceon/off trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test traceoff trigger" +echo 1 > tracing_on +echo 'traceoff' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat tracing_on` -ne 0 ]; then + fail "traceoff trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test traceon trigger" +echo 0 > tracing_on +echo 'traceon' > events/sched/sched_process_fork/trigger +( echo "forked") +if [ `cat tracing_on` -ne 1 ]; then + fail "traceoff trigger on sched_process_fork did not work" +fi + +reset_trigger + +echo "Test semantic error for traceoff/on trigger" +! echo 'traceoff:badparam' > events/sched/sched_process_fork/trigger +! echo 'traceoff+0' > events/sched/sched_process_fork/trigger +echo 'traceon' > events/sched/sched_process_fork/trigger +! echo 'traceon' > events/sched/sched_process_fork/trigger +! echo 'traceoff' > events/sched/sched_process_fork/trigger + +do_reset + +exit 0 -- cgit From 76929ab51f0ee398bcf72286c4b377051f07a31b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Mar 2016 12:55:01 -0600 Subject: kselftests/ftrace: Add hist trigger testcases Add the hist trigger testcases for ftracetest. This checks the basic histogram trigger behaviors like as; - Histogram trigger itself - Histogram with string key - Histogram with compound keys - Histogram with sort key - Histogram trigger modifiers (execname, hex, syscall) - Multiple histograms on an event - Named histogram - Named histogram on multi events Here is the test result. ---- # ./ftracetest test.d/trigger/*hist*.tc === Ftrace unit tests === [1] event trigger - test histogram modifiers [PASS] [2] event trigger - test histogram trigger [PASS] [3] event trigger - test multiple histogram triggers [PASS] # of passed: 3 # of failed: 0 # of unresolved: 0 # of untested: 0 # of unsupported: 0 # of xfailed: 0 # of undefined(test bug): 0 ---- Link: http://lkml.kernel.org/r/17cb3a3d9eeadc3282645147905455a298e7fbeb.1457029949.git.tom.zanussi@linux.intel.com Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Shuah Khan Cc: Namhyung Kim Cc: Tom Zanussi Signed-off-by: Tom Zanussi [Tom Zanussi: Change multihist test from truncate ('>') to append ('>>')] Reviewed-by: Namhyung Kim Signed-off-by: Steven Rostedt --- .../ftrace/test.d/trigger/trigger-hist-mod.tc | 65 +++++++++++++++++ .../ftrace/test.d/trigger/trigger-hist.tc | 83 ++++++++++++++++++++++ .../ftrace/test.d/trigger/trigger-multihist.tc | 73 +++++++++++++++++++ 3 files changed, 221 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc new file mode 100644 index 000000000000..57e350a0bcca --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc @@ -0,0 +1,65 @@ +#!/bin/sh +# description: event trigger - test histogram modifiers + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep hist events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +echo "Test histogram with execname modifier" + +echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +COMM=`cat /proc/$$/comm` +grep "common_pid: $COMM" events/sched/sched_process_fork/hist > /dev/null || \ + fail "execname modifier on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with hex modifier" + +echo 'hist:keys=parent_pid.hex' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +# Note that $$ is the parent pid. $PID is current PID. +HEX=`printf %x $PID` +grep "parent_pid: $HEX" events/sched/sched_process_fork/hist > /dev/null || \ + fail "hex modifier on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with syscall modifier" + +echo 'hist:keys=id.syscall' > events/raw_syscalls/sys_exit/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep "id: sys_" events/raw_syscalls/sys_exit/hist > /dev/null || \ + fail "syscall modifier on raw_syscalls/sys_exit did not work" + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc new file mode 100644 index 000000000000..b2902d42a537 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc @@ -0,0 +1,83 @@ +#!/bin/sh +# description: event trigger - test histogram trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep hist events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +echo "Test histogram basic tigger" + +echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep parent_pid events/sched/sched_process_fork/hist > /dev/null || \ + fail "hist trigger on sched_process_fork did not work" +grep child events/sched/sched_process_fork/hist > /dev/null || \ + fail "hist trigger on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with compound keys" + +echo 'hist:keys=parent_pid,child_pid' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep '^{ parent_pid:.*, child_pid:.*}' events/sched/sched_process_fork/hist > /dev/null || \ + fail "compound keys on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with string key" + +echo 'hist:keys=parent_comm' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +COMM=`cat /proc/$$/comm` +grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \ + fail "string key on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with sort key" + +echo 'hist:keys=parent_pid,child_pid:sort=child_pid.ascending' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done + +check_inc() { + while [ $# -gt 1 ]; do + [ $1 -gt $2 ] && return 1 + shift 1 + done + return 0 +} +check_inc `grep -o "child_pid:[[:space:]]*[[:digit:]]*" \ + events/sched/sched_process_fork/hist | cut -d: -f2 ` || + fail "sort param on sched_process_fork did not work" + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc new file mode 100644 index 000000000000..03c4a46561fc --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc @@ -0,0 +1,73 @@ +#!/bin/sh +# description: event trigger - test multiple histogram triggers + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/sched/sched_process_fork/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +FEATURE=`grep hist events/sched/sched_process_fork/trigger` +if [ -z "$FEATURE" ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +reset_trigger + +echo "Test histogram multiple tiggers" + +echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger +echo 'hist:keys=parent_comm:vals=child_pid' >> events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep parent_pid events/sched/sched_process_fork/hist > /dev/null || \ + fail "hist trigger on sched_process_fork did not work" +grep child events/sched/sched_process_fork/hist > /dev/null || \ + fail "hist trigger on sched_process_fork did not work" +COMM=`cat /proc/$$/comm` +grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \ + fail "string key on sched_process_fork did not work" + +reset_trigger + +echo "Test histogram with its name" + +echo 'hist:name=test_hist:keys=common_pid' > events/sched/sched_process_fork/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep test_hist events/sched/sched_process_fork/hist > /dev/null || \ + fail "named event on sched_process_fork did not work" + +echo "Test same named histogram on different events" + +echo 'hist:name=test_hist:keys=common_pid' > events/sched/sched_process_exit/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep test_hist events/sched/sched_process_exit/hist > /dev/null || \ + fail "named event on sched_process_fork did not work" + +diffs=`diff events/sched/sched_process_exit/hist events/sched/sched_process_fork/hist | wc -l` +test $diffs -eq 0 || fail "Same name histograms are not same" + +reset_trigger + +do_reset + +exit 0 -- cgit From 93c5f671f25cb7a73492b7a96b426c4fb1efa715 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 3 Mar 2016 12:55:03 -0600 Subject: kselftests/ftrace: Add a test for log2 modifier of hist trigger Add a test for log2 modifier of hist trigger in hist_mod.tc. Here is the test result. ---- # ./ftracetest test.d/trigger/trigger-hist-mod.tc === Ftrace unit tests === [1] event trigger - test histogram modifiers [PASS] # of passed: 1 # of failed: 0 # of unresolved: 0 # of untested: 0 # of unsupported: 0 # of xfailed: 0 # of undefined(test bug): 0 ---- Link: http://lkml.kernel.org/r/3f1ab735c06a50b1b40d3e96b8b6a3e5ea62fd86.1457029949.git.tom.zanussi@linux.intel.com Signed-off-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Shuah Khan Cc: Namhyung Kim Cc: Tom Zanussi Tested-by: Tom Zanussi Signed-off-by: Steven Rostedt --- .../selftests/ftrace/test.d/trigger/trigger-hist-mod.tc | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc index 57e350a0bcca..c2b61c4fda11 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc @@ -60,6 +60,16 @@ for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done grep "id: sys_" events/raw_syscalls/sys_exit/hist > /dev/null || \ fail "syscall modifier on raw_syscalls/sys_exit did not work" + +reset_trigger + +echo "Test histgram with log2 modifier" + +echo 'hist:keys=bytes_req.log2' > events/kmem/kmalloc/trigger +for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done +grep 'bytes_req: ~ 2^[0-9]*' events/kmem/kmalloc/hist > /dev/null || \ + fail "log2 modifier on kmem/kmalloc did not work" + do_reset exit 0 -- cgit From 23d43848708afd7aa9a2c8516a3f269a3e71be4f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 8 Apr 2016 11:24:47 +0900 Subject: kbuild: rename cmd_cc_i_c to cmd_cpp_i_c This command just preprocesses .c files into .i files, so cmd_cpp_i_c seems more suitable. Signed-off-by: Masahiro Yamada Acked-by: Arnaldo Carvalho de Melo Signed-off-by: Michal Marek --- scripts/Makefile.build | 6 +++--- tools/build/Makefile.build | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 12821d9eae85..13f606ba2a3b 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -152,11 +152,11 @@ cmd_cc_s_c = $(CC) $(c_flags) $(DISABLE_LTO) -fverbose-asm -S -o $@ $< $(obj)/%.s: $(src)/%.c FORCE $(call if_changed_dep,cc_s_c) -quiet_cmd_cc_i_c = CPP $(quiet_modtag) $@ -cmd_cc_i_c = $(CPP) $(c_flags) -o $@ $< +quiet_cmd_cpp_i_c = CPP $(quiet_modtag) $@ +cmd_cpp_i_c = $(CPP) $(c_flags) -o $@ $< $(obj)/%.i: $(src)/%.c FORCE - $(call if_changed_dep,cc_i_c) + $(call if_changed_dep,cpp_i_c) cmd_gensymtypes = \ $(CPP) -D__GENKSYMS__ $(c_flags) $< | \ diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build index ee566e8bd1cf..27f3583193e6 100644 --- a/tools/build/Makefile.build +++ b/tools/build/Makefile.build @@ -58,8 +58,8 @@ quiet_cmd_mkdir = MKDIR $(dir $@) quiet_cmd_cc_o_c = CC $@ cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< -quiet_cmd_cc_i_c = CPP $@ - cmd_cc_i_c = $(CC) $(c_flags) -E -o $@ $< +quiet_cmd_cpp_i_c = CPP $@ + cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $< quiet_cmd_cc_s_c = AS $@ cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< @@ -83,11 +83,11 @@ $(OUTPUT)%.o: %.S FORCE $(OUTPUT)%.i: %.c FORCE $(call rule_mkdir) - $(call if_changed_dep,cc_i_c) + $(call if_changed_dep,cpp_i_c) $(OUTPUT)%.s: %.S FORCE $(call rule_mkdir) - $(call if_changed_dep,cc_i_c) + $(call if_changed_dep,cpp_i_c) $(OUTPUT)%.s: %.c FORCE $(call rule_mkdir) -- cgit From dc642e8388b63a9a221903584bb6b562e5055d0d Mon Sep 17 00:00:00 2001 From: Anju T Date: Sat, 20 Feb 2016 10:32:47 +0530 Subject: tools/perf: Map the ID values with register names Map ID values with corresponding register names. These names are then displayed when user issues perf record with the -I option followed by perf report/script with -D option. To test this patchset, Eg: $ perf record -I ls # record machine state at interrupt $ perf script -D # read the perf.data file Sample output obtained for this patch / output looks like as follows: 496768515470 0x1988 [0x188]: PERF_RECORD_SAMPLE(IP, 0x1): 4522/4522: 0xc0000000001e538c period: 1 addr: 0 ... intr regs: mask 0x7ffffffffff ABI 64-bit .... r0 0xc0000000001e5e34 .... r1 0xc000000fe733f9a0 .... r2 0xc000000001523100 .... r3 0xc000000ffaadeb60 .... r4 0xc000000003456800 .... r5 0x73a9b5e000 .... r6 0x1e000000 .... r7 0x0 .... r8 0x0 .... r9 0x0 .... r10 0x1 .... r11 0x0 .... r12 0x24022822 .... r13 0xc00000000feec180 .... r14 0x0 .... r15 0xc000001e4be18800 .... r16 0x0 .... r17 0xc000000ffaac5000 .... r18 0xc000000fe733f8a0 .... r19 0xc000000001523100 .... r20 0xc00000000009fd1c .... r21 0xc000000fcaa69000 .... r22 0xc0000000001e4968 .... r23 0xc000000001523100 .... r24 0xc000000fe733f850 .... r25 0xc000000fcaa69000 .... r26 0xc000000003b8fcf0 .... r27 0xfffffffffffffead .... r28 0x0 .... r29 0xc000000fcaa69000 .... r30 0x1 .... r31 0x0 .... nip 0xc0000000001dd320 .... msr 0x9000000000009032 .... orig_r3 0xc0000000001e538c .... ctr 0xc00000000009d550 .... link 0xc0000000001e5e34 .... xer 0x0 .... ccr 0x84022882 .... softe 0x0 .... trap 0xf01 .... dar 0x0 .... dsisr 0xf00040060000004 ... thread: :4522:4522 ...... dso: /root/.debug/.build-id/b0/ef11b1a1629e62ac9de75199117ee5ef9469e9 :4522 4522 496.768515: 1 cycles: c0000000001e538c .perf_event_context_sched_in (/boot/vmlinux) Signed-off-by: Anju T Acked-by: Arnaldo Carvalho de Melo Signed-off-by: Michael Ellerman --- tools/perf/arch/powerpc/include/perf_regs.h | 69 +++++++++++++++++++++++++++++ tools/perf/config/Makefile | 5 +++ 2 files changed, 74 insertions(+) create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h (limited to 'tools') diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h new file mode 100644 index 000000000000..75de0e92e71e --- /dev/null +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -0,0 +1,69 @@ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include +#include +#include + +#define PERF_REGS_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) +#define PERF_REGS_MAX PERF_REG_POWERPC_MAX +#ifdef __powerpc64__ + #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64 +#else + #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32 +#endif + +#define PERF_REG_IP PERF_REG_POWERPC_NIP +#define PERF_REG_SP PERF_REG_POWERPC_R1 + +static const char *reg_names[] = { + [PERF_REG_POWERPC_R0] = "r0", + [PERF_REG_POWERPC_R1] = "r1", + [PERF_REG_POWERPC_R2] = "r2", + [PERF_REG_POWERPC_R3] = "r3", + [PERF_REG_POWERPC_R4] = "r4", + [PERF_REG_POWERPC_R5] = "r5", + [PERF_REG_POWERPC_R6] = "r6", + [PERF_REG_POWERPC_R7] = "r7", + [PERF_REG_POWERPC_R8] = "r8", + [PERF_REG_POWERPC_R9] = "r9", + [PERF_REG_POWERPC_R10] = "r10", + [PERF_REG_POWERPC_R11] = "r11", + [PERF_REG_POWERPC_R12] = "r12", + [PERF_REG_POWERPC_R13] = "r13", + [PERF_REG_POWERPC_R14] = "r14", + [PERF_REG_POWERPC_R15] = "r15", + [PERF_REG_POWERPC_R16] = "r16", + [PERF_REG_POWERPC_R17] = "r17", + [PERF_REG_POWERPC_R18] = "r18", + [PERF_REG_POWERPC_R19] = "r19", + [PERF_REG_POWERPC_R20] = "r20", + [PERF_REG_POWERPC_R21] = "r21", + [PERF_REG_POWERPC_R22] = "r22", + [PERF_REG_POWERPC_R23] = "r23", + [PERF_REG_POWERPC_R24] = "r24", + [PERF_REG_POWERPC_R25] = "r25", + [PERF_REG_POWERPC_R26] = "r26", + [PERF_REG_POWERPC_R27] = "r27", + [PERF_REG_POWERPC_R28] = "r28", + [PERF_REG_POWERPC_R29] = "r29", + [PERF_REG_POWERPC_R30] = "r30", + [PERF_REG_POWERPC_R31] = "r31", + [PERF_REG_POWERPC_NIP] = "nip", + [PERF_REG_POWERPC_MSR] = "msr", + [PERF_REG_POWERPC_ORIG_R3] = "orig_r3", + [PERF_REG_POWERPC_CTR] = "ctr", + [PERF_REG_POWERPC_LINK] = "link", + [PERF_REG_POWERPC_XER] = "xer", + [PERF_REG_POWERPC_CCR] = "ccr", + [PERF_REG_POWERPC_SOFTE] = "softe", + [PERF_REG_POWERPC_TRAP] = "trap", + [PERF_REG_POWERPC_DAR] = "dar", + [PERF_REG_POWERPC_DSISR] = "dsisr" +}; + +static inline const char *perf_reg_name(int id) +{ + return reg_names[id]; +} +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index f7d7f5a1cad5..eda1cfbbfb2d 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -23,6 +23,11 @@ $(call detected_var,ARCH) NO_PERF_REGS := 1 +# Additional ARCH settings for ppc +ifeq ($(ARCH),powerpc) + NO_PERF_REGS := 0 +endif + # Additional ARCH settings for x86 ifeq ($(ARCH),x86) $(call detected,CONFIG_X86) -- cgit From bb62bad623deb7952aef13fcbe065fde5d1f03e5 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Sat, 20 Feb 2016 10:32:48 +0530 Subject: tool/perf: Add sample_reg_mask to include all perf_regs Add sample_reg_mask array with pt_regs registers. This is needed for printing supported regs ( -I? option). Signed-off-by: Madhavan Srinivasan Acked-by: Arnaldo Carvalho de Melo Signed-off-by: Michael Ellerman --- tools/perf/arch/powerpc/util/Build | 1 + tools/perf/arch/powerpc/util/perf_regs.c | 49 ++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c (limited to 'tools') diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index c8fe2074d217..9ee335016592 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,6 +1,7 @@ libperf-y += header.o libperf-y += sym-handling.o libperf-y += kvm-stat.o +libperf-y += perf_regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c new file mode 100644 index 000000000000..a3c3e1ce6807 --- /dev/null +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -0,0 +1,49 @@ +#include "../../perf.h" +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG(r0, PERF_REG_POWERPC_R0), + SMPL_REG(r1, PERF_REG_POWERPC_R1), + SMPL_REG(r2, PERF_REG_POWERPC_R2), + SMPL_REG(r3, PERF_REG_POWERPC_R3), + SMPL_REG(r4, PERF_REG_POWERPC_R4), + SMPL_REG(r5, PERF_REG_POWERPC_R5), + SMPL_REG(r6, PERF_REG_POWERPC_R6), + SMPL_REG(r7, PERF_REG_POWERPC_R7), + SMPL_REG(r8, PERF_REG_POWERPC_R8), + SMPL_REG(r9, PERF_REG_POWERPC_R9), + SMPL_REG(r10, PERF_REG_POWERPC_R10), + SMPL_REG(r11, PERF_REG_POWERPC_R11), + SMPL_REG(r12, PERF_REG_POWERPC_R12), + SMPL_REG(r13, PERF_REG_POWERPC_R13), + SMPL_REG(r14, PERF_REG_POWERPC_R14), + SMPL_REG(r15, PERF_REG_POWERPC_R15), + SMPL_REG(r16, PERF_REG_POWERPC_R16), + SMPL_REG(r17, PERF_REG_POWERPC_R17), + SMPL_REG(r18, PERF_REG_POWERPC_R18), + SMPL_REG(r19, PERF_REG_POWERPC_R19), + SMPL_REG(r20, PERF_REG_POWERPC_R20), + SMPL_REG(r21, PERF_REG_POWERPC_R21), + SMPL_REG(r22, PERF_REG_POWERPC_R22), + SMPL_REG(r23, PERF_REG_POWERPC_R23), + SMPL_REG(r24, PERF_REG_POWERPC_R24), + SMPL_REG(r25, PERF_REG_POWERPC_R25), + SMPL_REG(r26, PERF_REG_POWERPC_R26), + SMPL_REG(r27, PERF_REG_POWERPC_R27), + SMPL_REG(r28, PERF_REG_POWERPC_R28), + SMPL_REG(r29, PERF_REG_POWERPC_R29), + SMPL_REG(r30, PERF_REG_POWERPC_R30), + SMPL_REG(r31, PERF_REG_POWERPC_R31), + SMPL_REG(nip, PERF_REG_POWERPC_NIP), + SMPL_REG(msr, PERF_REG_POWERPC_MSR), + SMPL_REG(orig_r3, PERF_REG_POWERPC_ORIG_R3), + SMPL_REG(ctr, PERF_REG_POWERPC_CTR), + SMPL_REG(link, PERF_REG_POWERPC_LINK), + SMPL_REG(xer, PERF_REG_POWERPC_XER), + SMPL_REG(ccr, PERF_REG_POWERPC_CCR), + SMPL_REG(softe, PERF_REG_POWERPC_SOFTE), + SMPL_REG(trap, PERF_REG_POWERPC_TRAP), + SMPL_REG(dar, PERF_REG_POWERPC_DAR), + SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR), + SMPL_REG_END +}; -- cgit From 200c79da824c978fcf6eec1dc9c0a1e521133267 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 22 Mar 2016 00:22:16 -0700 Subject: libnvdimm, pmem, pfn: make pmem_rw_bytes generic and refactor pfn setup In preparation for providing an alternative (to block device) access mechanism to persistent memory, convert pmem_rw_bytes() to nsio_rw_bytes(). This allows ->rw_bytes() functionality without requiring a 'struct pmem_device' to be instantiated. In other words, when ->rw_bytes() is in use i/o is driven through 'struct nd_namespace_io', otherwise it is driven through 'struct pmem_device' and the block layer. This consolidates the disjoint calls to devm_exit_badblocks() and devm_memunmap() into a common devm_nsio_disable() and cleans up the init path to use a unified pmem_attach_disk() implementation. Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- drivers/nvdimm/blk.c | 2 +- drivers/nvdimm/btt_devs.c | 4 +- drivers/nvdimm/claim.c | 61 ++++++++++ drivers/nvdimm/nd.h | 40 +++++-- drivers/nvdimm/pfn_devs.c | 4 +- drivers/nvdimm/pmem.c | 236 ++++++++++++++------------------------ include/linux/nd.h | 9 +- tools/testing/nvdimm/Kbuild | 1 + tools/testing/nvdimm/test/iomap.c | 27 +++-- 9 files changed, 211 insertions(+), 173 deletions(-) (limited to 'tools') diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 4c14ecdc792b..495e06d9f7e7 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -324,7 +324,7 @@ static int nd_blk_probe(struct device *dev) ndns->rw_bytes = nsblk_rw_bytes; if (is_nd_btt(dev)) return nvdimm_namespace_attach_btt(ndns); - else if (nd_btt_probe(dev, ndns, nsblk) == 0) { + else if (nd_btt_probe(dev, ndns) == 0) { /* we'll come back as btt-blk */ return -ENXIO; } else diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 1886171af80e..816d0dae6398 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -273,8 +273,7 @@ static int __nd_btt_probe(struct nd_btt *nd_btt, return 0; } -int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata) +int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns) { int rc; struct device *btt_dev; @@ -289,7 +288,6 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns, nvdimm_bus_unlock(&ndns->dev); if (!btt_dev) return -ENOMEM; - dev_set_drvdata(btt_dev, drvdata); btt_sb = devm_kzalloc(dev, sizeof(*btt_sb), GFP_KERNEL); rc = __nd_btt_probe(to_nd_btt(btt_dev), ndns, btt_sb); dev_dbg(dev, "%s: btt: %s\n", __func__, diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index e8f03b0e95e4..6bbd0a36994a 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -12,6 +12,7 @@ */ #include #include +#include #include "nd-core.h" #include "pfn.h" #include "btt.h" @@ -199,3 +200,63 @@ u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb) return sum; } EXPORT_SYMBOL(nd_sb_checksum); + +static int nsio_rw_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *buf, size_t size, int rw) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + + if (unlikely(offset + size > nsio->size)) { + dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); + return -EFAULT; + } + + if (rw == READ) { + unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512); + + if (unlikely(is_bad_pmem(&nsio->bb, offset / 512, sz_align))) + return -EIO; + return memcpy_from_pmem(buf, nsio->addr + offset, size); + } else { + memcpy_to_pmem(nsio->addr + offset, buf, size); + wmb_pmem(); + } + + return 0; +} + +int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio) +{ + struct resource *res = &nsio->res; + struct nd_namespace_common *ndns = &nsio->common; + + nsio->size = resource_size(res); + if (!devm_request_mem_region(dev, res->start, resource_size(res), + dev_name(dev))) { + dev_warn(dev, "could not reserve region %pR\n", res); + return -EBUSY; + } + + ndns->rw_bytes = nsio_rw_bytes; + if (devm_init_badblocks(dev, &nsio->bb)) + return -ENOMEM; + nvdimm_badblocks_populate(to_nd_region(ndns->dev.parent), &nsio->bb, + &nsio->res); + + nsio->addr = devm_memremap(dev, res->start, resource_size(res), + ARCH_MEMREMAP_PMEM); + if (IS_ERR(nsio->addr)) + return PTR_ERR(nsio->addr); + return 0; +} +EXPORT_SYMBOL_GPL(devm_nsio_enable); + +void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio) +{ + struct resource *res = &nsio->res; + + devm_memunmap(dev, nsio->addr); + devm_exit_badblocks(dev, &nsio->bb); + devm_release_mem_region(dev, res->start, resource_size(res)); +} +EXPORT_SYMBOL_GPL(devm_nsio_disable); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 0fb14890ba26..10e23fe49012 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -13,6 +13,7 @@ #ifndef __ND_H__ #define __ND_H__ #include +#include #include #include #include @@ -197,13 +198,12 @@ struct nd_gen_sb { u64 nd_sb_checksum(struct nd_gen_sb *sb); #if IS_ENABLED(CONFIG_BTT) -int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata); +int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_btt(struct device *dev); struct device *nd_btt_create(struct nd_region *nd_region); #else static inline int nd_btt_probe(struct device *dev, - struct nd_namespace_common *ndns, void *drvdata) + struct nd_namespace_common *ndns) { return -ENODEV; } @@ -221,14 +221,13 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region) struct nd_pfn *to_nd_pfn(struct device *dev); #if IS_ENABLED(CONFIG_NVDIMM_PFN) -int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata); +int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_pfn(struct device *dev); struct device *nd_pfn_create(struct nd_region *nd_region); int nd_pfn_validate(struct nd_pfn *nd_pfn); #else -static inline int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata) +static inline int nd_pfn_probe(struct device *dev, + struct nd_namespace_common *ndns) { return -ENODEV; } @@ -272,6 +271,20 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, char *name); void nvdimm_badblocks_populate(struct nd_region *nd_region, struct badblocks *bb, const struct resource *res); +#if IS_ENABLED(CONFIG_ND_CLAIM) +int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio); +void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio); +#else +static inline int devm_nsio_enable(struct device *dev, + struct nd_namespace_io *nsio) +{ + return -ENXIO; +} +static inline void devm_nsio_disable(struct device *dev, + struct nd_namespace_io *nsio) +{ +} +#endif int nd_blk_region_init(struct nd_region *nd_region); void __nd_iostat_start(struct bio *bio, unsigned long *start); static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) @@ -285,6 +298,19 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) return true; } void nd_iostat_end(struct bio *bio, unsigned long start); +static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector, + unsigned int len) +{ + if (bb->count) { + sector_t first_bad; + int num_bad; + + return !!badblocks_check(bb, sector, len / 512, &first_bad, + &num_bad); + } + + return false; +} resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk); const u8 *nd_dev_to_uuid(struct device *dev); bool pmem_should_map_pages(struct device *dev); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 96aa5490c279..9df081ae96e3 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -410,8 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) } EXPORT_SYMBOL(nd_pfn_validate); -int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata) +int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) { int rc; struct nd_pfn *nd_pfn; @@ -427,7 +426,6 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, nvdimm_bus_unlock(&ndns->dev); if (!pfn_dev) return -ENOMEM; - dev_set_drvdata(pfn_dev, drvdata); pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); nd_pfn = to_nd_pfn(pfn_dev); nd_pfn->pfn_sb = pfn_sb; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 67d48e2e8ca2..b5f81b02205c 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -49,19 +49,6 @@ struct pmem_device { struct badblocks bb; }; -static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) -{ - if (bb->count) { - sector_t first_bad; - int num_bad; - - return !!badblocks_check(bb, sector, len / 512, &first_bad, - &num_bad); - } - - return false; -} - static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, unsigned int len) { @@ -209,16 +196,40 @@ void pmem_release_disk(void *disk) put_disk(disk); } -static struct pmem_device *pmem_alloc(struct device *dev, - struct resource *res, int id) +static struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct resource *res, struct vmem_altmap *altmap); + +static int pmem_attach_disk(struct device *dev, + struct nd_namespace_common *ndns) { + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct vmem_altmap __altmap, *altmap = NULL; + struct resource *res = &nsio->res; + struct nd_pfn *nd_pfn = NULL; + int nid = dev_to_node(dev); + struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; + struct resource pfn_res; struct request_queue *q; + struct gendisk *disk; + void *addr; + + /* while nsio_rw_bytes is active, parse a pfn info block if present */ + if (is_nd_pfn(dev)) { + nd_pfn = to_nd_pfn(dev); + altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap); + if (IS_ERR(altmap)) + return PTR_ERR(altmap); + } + + /* we're attaching a block device, disable raw namespace access */ + devm_nsio_disable(dev, nsio); pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) - return ERR_PTR(-ENOMEM); + return -ENOMEM; + dev_set_drvdata(dev, pmem); pmem->phys_addr = res->start; pmem->size = resource_size(res); if (!arch_has_wmb_pmem()) @@ -227,22 +238,31 @@ static struct pmem_device *pmem_alloc(struct device *dev, if (!devm_request_mem_region(dev, res->start, resource_size(res), dev_name(dev))) { dev_warn(dev, "could not reserve region %pR\n", res); - return ERR_PTR(-EBUSY); + return -EBUSY; } q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); if (!q) - return ERR_PTR(-ENOMEM); + return -ENOMEM; + pmem->pmem_queue = q; pmem->pfn_flags = PFN_DEV; - if (pmem_should_map_pages(dev)) { - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res, + if (is_nd_pfn(dev)) { + addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter, + altmap); + pfn_sb = nd_pfn->pfn_sb; + pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); + pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res); + pmem->pfn_flags |= PFN_MAP; + res = &pfn_res; /* for badblocks populate */ + res->start += pmem->data_offset; + } else if (pmem_should_map_pages(dev)) { + addr = devm_memremap_pages(dev, &nsio->res, &q->q_usage_counter, NULL); pmem->pfn_flags |= PFN_MAP; } else - pmem->virt_addr = (void __pmem *) devm_memremap(dev, - pmem->phys_addr, pmem->size, - ARCH_MEMREMAP_PMEM); + addr = devm_memremap(dev, pmem->phys_addr, + pmem->size, ARCH_MEMREMAP_PMEM); /* * At release time the queue must be dead before @@ -250,23 +270,12 @@ static struct pmem_device *pmem_alloc(struct device *dev, */ if (devm_add_action(dev, pmem_release_queue, q)) { blk_cleanup_queue(q); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } - if (IS_ERR(pmem->virt_addr)) - return (void __force *) pmem->virt_addr; - - pmem->pmem_queue = q; - return pmem; -} - -static int pmem_attach_disk(struct device *dev, - struct nd_namespace_common *ndns, struct pmem_device *pmem) -{ - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - int nid = dev_to_node(dev); - struct resource bb_res; - struct gendisk *disk; + if (IS_ERR(addr)) + return PTR_ERR(addr); + pmem->virt_addr = (void __pmem *) addr; blk_queue_make_request(pmem->pmem_queue, pmem_make_request); blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE); @@ -291,20 +300,9 @@ static int pmem_attach_disk(struct device *dev, set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) / 512); pmem->pmem_disk = disk; - devm_exit_badblocks(dev, &pmem->bb); if (devm_init_badblocks(dev, &pmem->bb)) return -ENOMEM; - bb_res.start = nsio->res.start + pmem->data_offset; - bb_res.end = nsio->res.end; - if (is_nd_pfn(dev)) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); - struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; - - bb_res.start += __le32_to_cpu(pfn_sb->start_pad); - bb_res.end -= __le32_to_cpu(pfn_sb->end_trunc); - } - nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, - &bb_res); + nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res); disk->bb = &pmem->bb; add_disk(disk); revalidate_disk(disk); @@ -312,33 +310,8 @@ static int pmem_attach_disk(struct device *dev, return 0; } -static int pmem_rw_bytes(struct nd_namespace_common *ndns, - resource_size_t offset, void *buf, size_t size, int rw) -{ - struct pmem_device *pmem = dev_get_drvdata(ndns->claim); - - if (unlikely(offset + size > pmem->size)) { - dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); - return -EFAULT; - } - - if (rw == READ) { - unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512); - - if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align))) - return -EIO; - return memcpy_from_pmem(buf, pmem->virt_addr + offset, size); - } else { - memcpy_to_pmem(pmem->virt_addr + offset, buf, size); - wmb_pmem(); - } - - return 0; -} - static int nd_pfn_init(struct nd_pfn *nd_pfn) { - struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev); struct nd_namespace_common *ndns = nd_pfn->ndns; u32 start_pad = 0, end_trunc = 0; resource_size_t start, size; @@ -404,7 +377,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * ->direct_access() to those that are included in the memmap. */ start += start_pad; - npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K; + size = resource_size(&nsio->res); + npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K; if (nd_pfn->mode == PFN_MODE_PMEM) offset = ALIGN(start + SZ_8K + 64 * npfns, nd_pfn->align) - start; @@ -413,13 +387,13 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) else return -ENXIO; - if (offset + start_pad + end_trunc >= pmem->size) { + if (offset + start_pad + end_trunc >= size) { dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", dev_name(&ndns->dev)); return -ENXIO; } - npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K; + npfns = (size - offset - start_pad - end_trunc) / SZ_4K; pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); @@ -456,17 +430,14 @@ static unsigned long init_altmap_reserve(resource_size_t base) return reserve; } -static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) +static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct resource *res, struct vmem_altmap *altmap) { - struct resource res; - struct request_queue *q; - struct pmem_device *pmem; - struct vmem_altmap *altmap; - struct device *dev = &nd_pfn->dev; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; - struct nd_namespace_common *ndns = nd_pfn->ndns; + u64 offset = le64_to_cpu(pfn_sb->dataoff); u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t base = nsio->res.start + start_pad; struct vmem_altmap __altmap = { @@ -474,112 +445,75 @@ static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) .reserve = init_altmap_reserve(base), }; - pmem = dev_get_drvdata(dev); - pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); - pmem->pfn_pad = start_pad + end_trunc; + memcpy(res, &nsio->res, sizeof(*res)); + res->start += start_pad; + res->end -= end_trunc; + nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); if (nd_pfn->mode == PFN_MODE_RAM) { - if (pmem->data_offset < SZ_8K) - return -EINVAL; + if (offset < SZ_8K) + return ERR_PTR(-EINVAL); nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); altmap = NULL; } else if (nd_pfn->mode == PFN_MODE_PMEM) { - nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset) - / PAGE_SIZE; + nd_pfn->npfns = (resource_size(res) - offset) / PAGE_SIZE; if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) dev_info(&nd_pfn->dev, "number of pfns truncated from %lld to %ld\n", le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); - altmap = & __altmap; - altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K); + memcpy(altmap, &__altmap, sizeof(*altmap)); + altmap->free = PHYS_PFN(offset - SZ_8K); altmap->alloc = 0; } else - return -ENXIO; + return ERR_PTR(-ENXIO); - /* establish pfn range for lookup, and switch to direct map */ - q = pmem->pmem_queue; - memcpy(&res, &nsio->res, sizeof(res)); - res.start += start_pad; - res.end -= end_trunc; - devm_remove_action(dev, pmem_release_queue, q); - devm_memunmap(dev, (void __force *) pmem->virt_addr); - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res, - &q->q_usage_counter, altmap); - pmem->pfn_flags |= PFN_MAP; - - /* - * At release time the queue must be dead before - * devm_memremap_pages is unwound - */ - if (devm_add_action(dev, pmem_release_queue, q)) { - blk_cleanup_queue(q); - return -ENOMEM; - } - if (IS_ERR(pmem->virt_addr)) - return PTR_ERR(pmem->virt_addr); - - /* attach pmem disk in "pfn-mode" */ - return pmem_attach_disk(dev, ndns, pmem); + return altmap; } -static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +/* + * Determine the effective resource range and vmem_altmap from an nd_pfn + * instance. + */ +static struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct resource *res, struct vmem_altmap *altmap) { - struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); int rc; if (!nd_pfn->uuid || !nd_pfn->ndns) - return -ENODEV; + return ERR_PTR(-ENODEV); rc = nd_pfn_init(nd_pfn); if (rc) - return rc; + return ERR_PTR(rc); + /* we need a valid pfn_sb before we can init a vmem_altmap */ - return __nvdimm_namespace_attach_pfn(nd_pfn); + return __nvdimm_setup_pfn(nd_pfn, res, altmap); } static int nd_pmem_probe(struct device *dev) { - struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_namespace_common *ndns; - struct nd_namespace_io *nsio; - struct pmem_device *pmem; ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) return PTR_ERR(ndns); - nsio = to_nd_namespace_io(&ndns->dev); - pmem = pmem_alloc(dev, &nsio->res, nd_region->id); - if (IS_ERR(pmem)) - return PTR_ERR(pmem); - - dev_set_drvdata(dev, pmem); - ndns->rw_bytes = pmem_rw_bytes; - if (devm_init_badblocks(dev, &pmem->bb)) - return -ENOMEM; - nvdimm_badblocks_populate(nd_region, &pmem->bb, &nsio->res); + if (devm_nsio_enable(dev, to_nd_namespace_io(&ndns->dev))) + return -ENXIO; - if (is_nd_btt(dev)) { - /* btt allocates its own request_queue */ - devm_remove_action(dev, pmem_release_queue, pmem->pmem_queue); - blk_cleanup_queue(pmem->pmem_queue); + if (is_nd_btt(dev)) return nvdimm_namespace_attach_btt(ndns); - } if (is_nd_pfn(dev)) - return nvdimm_namespace_attach_pfn(ndns); + return pmem_attach_disk(dev, ndns); - if (nd_btt_probe(dev, ndns, pmem) == 0 - || nd_pfn_probe(dev, ndns, pmem) == 0) { - /* - * We'll come back as either btt-pmem, or pfn-pmem, so - * drop the queue allocation for now. - */ + /* if we find a valid info-block we'll come back as that personality */ + if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0) return -ENXIO; - } - return pmem_attach_disk(dev, ndns, pmem); + /* ...otherwise we're just a raw pmem device */ + return pmem_attach_disk(dev, ndns); } static int nd_pmem_remove(struct device *dev) diff --git a/include/linux/nd.h b/include/linux/nd.h index 5ea4aec7fd63..aee2761d294c 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -15,6 +15,7 @@ #include #include #include +#include enum nvdimm_event { NVDIMM_REVALIDATE_POISON, @@ -55,13 +56,19 @@ static inline struct nd_namespace_common *to_ndns(struct device *dev) } /** - * struct nd_namespace_io - infrastructure for loading an nd_pmem instance + * struct nd_namespace_io - device representation of a persistent memory range * @dev: namespace device created by the nd region driver * @res: struct resource conversion of a NFIT SPA table + * @size: cached resource_size(@res) for fast path size checks + * @addr: virtual address to access the namespace range + * @bb: badblocks list for the namespace range */ struct nd_namespace_io { struct nd_namespace_common common; struct resource res; + resource_size_t size; + void __pmem *addr; + struct badblocks bb; }; /** diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index a34bfd0c8928..d5bc8c080b44 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -7,6 +7,7 @@ ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap ldflags-y += --wrap=memunmap ldflags-y += --wrap=__devm_request_region +ldflags-y += --wrap=__devm_release_region ldflags-y += --wrap=__request_region ldflags-y += --wrap=__release_region ldflags-y += --wrap=devm_memremap_pages diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 0c1a7e65bb81..c842095f2801 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -239,13 +239,11 @@ struct resource *__wrap___devm_request_region(struct device *dev, } EXPORT_SYMBOL(__wrap___devm_request_region); -void __wrap___release_region(struct resource *parent, resource_size_t start, - resource_size_t n) +static bool nfit_test_release_region(struct resource *parent, + resource_size_t start, resource_size_t n) { - struct nfit_test_resource *nfit_res; - if (parent == &iomem_resource) { - nfit_res = get_nfit_res(start); + struct nfit_test_resource *nfit_res = get_nfit_res(start); if (nfit_res) { struct resource *res = nfit_res->res + 1; @@ -254,11 +252,26 @@ void __wrap___release_region(struct resource *parent, resource_size_t start, __func__, start, n, res); else memset(res, 0, sizeof(*res)); - return; + return true; } } - __release_region(parent, start, n); + return false; +} + +void __wrap___release_region(struct resource *parent, resource_size_t start, + resource_size_t n) +{ + if (!nfit_test_release_region(parent, start, n)) + __release_region(parent, start, n); } EXPORT_SYMBOL(__wrap___release_region); +void __wrap___devm_release_region(struct device *dev, struct resource *parent, + resource_size_t start, resource_size_t n) +{ + if (!nfit_test_release_region(parent, start, n)) + __devm_release_region(dev, parent, start, n); +} +EXPORT_SYMBOL(__wrap___devm_release_region); + MODULE_LICENSE("GPL v2"); -- cgit From 3391ba0e2792411dc3372b76a4662971d6eaa405 Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Tue, 8 Mar 2016 21:49:04 +0100 Subject: usbip: tools: Extract generic code to be shared with vudc backend Extract the code from current stub driver backend and a common interface for both stub driver and vudc. This allows to share most of the usbipd code for both of them. Based on code created in cooperation with Open Operating Systems Student Society at University of Warsaw (O2S3@UW) consisting of: Igor Kotrasinski Karol Kosik Ewelina Kosmider <3w3lfin@gmail.com> Dawid Lazarczyk Piotr Szulc Tutor and project owner: Krzysztof Opasiak Signed-off-by: Krzysztof Opasiak Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/libsrc/Makefile.am | 3 +- tools/usb/usbip/libsrc/usbip_host_common.c | 273 +++++++++++++++++++++++++++++ tools/usb/usbip/libsrc/usbip_host_common.h | 104 +++++++++++ tools/usb/usbip/libsrc/usbip_host_driver.c | 269 +++------------------------- tools/usb/usbip/libsrc/usbip_host_driver.h | 27 +-- tools/usb/usbip/src/usbipd.c | 30 ++-- 6 files changed, 428 insertions(+), 278 deletions(-) create mode 100644 tools/usb/usbip/libsrc/usbip_host_common.c create mode 100644 tools/usb/usbip/libsrc/usbip_host_common.h (limited to 'tools') diff --git a/tools/usb/usbip/libsrc/Makefile.am b/tools/usb/usbip/libsrc/Makefile.am index 7c8f8a4d54e4..eb62f99cc0ee 100644 --- a/tools/usb/usbip/libsrc/Makefile.am +++ b/tools/usb/usbip/libsrc/Makefile.am @@ -4,5 +4,6 @@ libusbip_la_LDFLAGS = -version-info @LIBUSBIP_VERSION@ lib_LTLIBRARIES := libusbip.la libusbip_la_SOURCES := names.c names.h usbip_host_driver.c usbip_host_driver.h \ - usbip_common.c usbip_common.h vhci_driver.c vhci_driver.h \ + usbip_common.c usbip_common.h usbip_host_common.h \ + usbip_host_common.c vhci_driver.c vhci_driver.h \ sysfs_utils.c sysfs_utils.h diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c new file mode 100644 index 000000000000..9d415228883d --- /dev/null +++ b/tools/usb/usbip/libsrc/usbip_host_common.c @@ -0,0 +1,273 @@ +/* + * Copyright (C) 2015-2016 Samsung Electronics + * Igor Kotrasinski + * Krzysztof Opasiak + * + * Refactored from usbip_host_driver.c, which is: + * Copyright (C) 2011 matt mooney + * 2005-2007 Takahiro Hirofuchi + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include +#include + +#include + +#include "usbip_common.h" +#include "usbip_host_common.h" +#include "list.h" +#include "sysfs_utils.h" + +struct udev *udev_context; + +static int32_t read_attr_usbip_status(struct usbip_usb_device *udev) +{ + char status_attr_path[SYSFS_PATH_MAX]; + int fd; + int length; + char status; + int value = 0; + + snprintf(status_attr_path, SYSFS_PATH_MAX, "%s/usbip_status", + udev->path); + + fd = open(status_attr_path, O_RDONLY); + if (fd < 0) { + err("error opening attribute %s", status_attr_path); + return -1; + } + + length = read(fd, &status, 1); + if (length < 0) { + err("error reading attribute %s", status_attr_path); + close(fd); + return -1; + } + + value = atoi(&status); + + return value; +} + +static +struct usbip_exported_device *usbip_exported_device_new( + struct usbip_host_driver *hdriver, const char *sdevpath) +{ + struct usbip_exported_device *edev = NULL; + struct usbip_exported_device *edev_old; + size_t size; + int i; + + edev = calloc(1, sizeof(struct usbip_exported_device)); + + edev->sudev = + udev_device_new_from_syspath(udev_context, sdevpath); + if (!edev->sudev) { + err("udev_device_new_from_syspath: %s", sdevpath); + goto err; + } + + if (hdriver->ops.read_device(edev->sudev, &edev->udev) < 0) + goto err; + + edev->status = read_attr_usbip_status(&edev->udev); + if (edev->status < 0) + goto err; + + /* reallocate buffer to include usb interface data */ + size = sizeof(struct usbip_exported_device) + + edev->udev.bNumInterfaces * sizeof(struct usbip_usb_interface); + + edev_old = edev; + edev = realloc(edev, size); + if (!edev) { + edev = edev_old; + dbg("realloc failed"); + goto err; + } + + for (i = 0; i < edev->udev.bNumInterfaces; i++) { + /* vudc does not support reading interfaces */ + if (!hdriver->ops.read_interface) + break; + hdriver->ops.read_interface(&edev->udev, i, &edev->uinf[i]); + } + + return edev; +err: + if (edev->sudev) + udev_device_unref(edev->sudev); + if (edev) + free(edev); + + return NULL; +} + +static int refresh_exported_devices(struct usbip_host_driver *hdriver) +{ + struct usbip_exported_device *edev; + struct udev_enumerate *enumerate; + struct udev_list_entry *devices, *dev_list_entry; + struct udev_device *dev; + const char *path; + + enumerate = udev_enumerate_new(udev_context); + udev_enumerate_add_match_subsystem(enumerate, hdriver->udev_subsystem); + udev_enumerate_scan_devices(enumerate); + + devices = udev_enumerate_get_list_entry(enumerate); + + udev_list_entry_foreach(dev_list_entry, devices) { + path = udev_list_entry_get_name(dev_list_entry); + dev = udev_device_new_from_syspath(udev_context, + path); + if (dev == NULL) + continue; + + /* Check whether device uses usbip driver. */ + if (hdriver->ops.is_my_device(dev)) { + edev = usbip_exported_device_new(hdriver, path); + if (!edev) { + dbg("usbip_exported_device_new failed"); + continue; + } + + list_add(&edev->node, &hdriver->edev_list); + hdriver->ndevs++; + } + } + + return 0; +} + +static void usbip_exported_device_destroy(struct list_head *devs) +{ + struct list_head *i, *tmp; + struct usbip_exported_device *edev; + + list_for_each_safe(i, tmp, devs) { + edev = list_entry(i, struct usbip_exported_device, node); + list_del(i); + free(edev); + } +} + +int usbip_generic_driver_open(struct usbip_host_driver *hdriver) +{ + int rc; + + udev_context = udev_new(); + if (!udev_context) { + err("udev_new failed"); + return -1; + } + + rc = refresh_exported_devices(hdriver); + if (rc < 0) + goto err; + return 0; +err: + udev_unref(udev_context); + return -1; +} + +void usbip_generic_driver_close(struct usbip_host_driver *hdriver) +{ + if (!hdriver) + return; + + usbip_exported_device_destroy(&hdriver->edev_list); + + udev_unref(udev_context); +} + +int usbip_generic_refresh_device_list(struct usbip_host_driver *hdriver) +{ + int rc; + + usbip_exported_device_destroy(&hdriver->edev_list); + + hdriver->ndevs = 0; + INIT_LIST_HEAD(&hdriver->edev_list); + + rc = refresh_exported_devices(hdriver); + if (rc < 0) + return -1; + + return 0; +} + +int usbip_export_device(struct usbip_exported_device *edev, int sockfd) +{ + char attr_name[] = "usbip_sockfd"; + char sockfd_attr_path[SYSFS_PATH_MAX]; + char sockfd_buff[30]; + int ret; + + if (edev->status != SDEV_ST_AVAILABLE) { + dbg("device not available: %s", edev->udev.busid); + switch (edev->status) { + case SDEV_ST_ERROR: + dbg("status SDEV_ST_ERROR"); + break; + case SDEV_ST_USED: + dbg("status SDEV_ST_USED"); + break; + default: + dbg("status unknown: 0x%x", edev->status); + } + return -1; + } + + /* only the first interface is true */ + snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s", + edev->udev.path, attr_name); + + snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd); + + ret = write_sysfs_attribute(sockfd_attr_path, sockfd_buff, + strlen(sockfd_buff)); + if (ret < 0) { + err("write_sysfs_attribute failed: sockfd %s to %s", + sockfd_buff, sockfd_attr_path); + return ret; + } + + info("connect: %s", edev->udev.busid); + + return ret; +} + +struct usbip_exported_device *usbip_generic_get_device( + struct usbip_host_driver *hdriver, int num) +{ + struct list_head *i; + struct usbip_exported_device *edev; + int cnt = 0; + + list_for_each(i, &hdriver->edev_list) { + edev = list_entry(i, struct usbip_exported_device, node); + if (num == cnt) + return edev; + cnt++; + } + + return NULL; +} diff --git a/tools/usb/usbip/libsrc/usbip_host_common.h b/tools/usb/usbip/libsrc/usbip_host_common.h new file mode 100644 index 000000000000..a64b8033fe64 --- /dev/null +++ b/tools/usb/usbip/libsrc/usbip_host_common.h @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2015-2016 Samsung Electronics + * Igor Kotrasinski + * Krzysztof Opasiak + * + * Refactored from usbip_host_driver.c, which is: + * Copyright (C) 2011 matt mooney + * 2005-2007 Takahiro Hirofuchi + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __USBIP_HOST_COMMON_H +#define __USBIP_HOST_COMMON_H + +#include +#include +#include +#include "list.h" +#include "usbip_common.h" +#include "sysfs_utils.h" + +struct usbip_host_driver; + +struct usbip_host_driver_ops { + int (*open)(struct usbip_host_driver *hdriver); + void (*close)(struct usbip_host_driver *hdriver); + int (*refresh_device_list)(struct usbip_host_driver *hdriver); + struct usbip_exported_device * (*get_device)( + struct usbip_host_driver *hdriver, int num); + + int (*read_device)(struct udev_device *sdev, + struct usbip_usb_device *dev); + int (*read_interface)(struct usbip_usb_device *udev, int i, + struct usbip_usb_interface *uinf); + int (*is_my_device)(struct udev_device *udev); +}; + +struct usbip_host_driver { + int ndevs; + /* list of exported device */ + struct list_head edev_list; + const char *udev_subsystem; + struct usbip_host_driver_ops ops; +}; + +struct usbip_exported_device { + struct udev_device *sudev; + int32_t status; + struct usbip_usb_device udev; + struct list_head node; + struct usbip_usb_interface uinf[]; +}; + +/* External API to access the driver */ +static inline int usbip_driver_open(struct usbip_host_driver *hdriver) +{ + if (!hdriver->ops.open) + return -EOPNOTSUPP; + return hdriver->ops.open(hdriver); +} + +static inline void usbip_driver_close(struct usbip_host_driver *hdriver) +{ + if (!hdriver->ops.close) + return; + hdriver->ops.close(hdriver); +} + +static inline int usbip_refresh_device_list(struct usbip_host_driver *hdriver) +{ + if (!hdriver->ops.refresh_device_list) + return -EOPNOTSUPP; + return hdriver->ops.refresh_device_list(hdriver); +} + +static inline struct usbip_exported_device * +usbip_get_device(struct usbip_host_driver *hdriver, int num) +{ + if (!hdriver->ops.get_device) + return NULL; + return hdriver->ops.get_device(hdriver, num); +} + +/* Helper functions for implementing driver backend */ +int usbip_generic_driver_open(struct usbip_host_driver *hdriver); +void usbip_generic_driver_close(struct usbip_host_driver *hdriver); +int usbip_generic_refresh_device_list(struct usbip_host_driver *hdriver); +int usbip_export_device(struct usbip_exported_device *edev, int sockfd); +struct usbip_exported_device *usbip_generic_get_device( + struct usbip_host_driver *hdriver, int num); + +#endif /* __USBIP_HOST_COMMON_H */ diff --git a/tools/usb/usbip/libsrc/usbip_host_driver.c b/tools/usb/usbip/libsrc/usbip_host_driver.c index bef08d5c44e8..4de6edc54d35 100644 --- a/tools/usb/usbip/libsrc/usbip_host_driver.c +++ b/tools/usb/usbip/libsrc/usbip_host_driver.c @@ -1,6 +1,9 @@ /* * Copyright (C) 2011 matt mooney * 2005-2007 Takahiro Hirofuchi + * Copyright (C) 2015-2016 Samsung Electronics + * Igor Kotrasinski + * Krzysztof Opasiak * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,265 +19,47 @@ * along with this program. If not, see . */ -#include -#include -#include - -#include #include - #include -#include "usbip_common.h" +#include "usbip_host_common.h" #include "usbip_host_driver.h" -#include "list.h" -#include "sysfs_utils.h" #undef PROGNAME #define PROGNAME "libusbip" -struct usbip_host_driver *host_driver; -struct udev *udev_context; - -static int32_t read_attr_usbip_status(struct usbip_usb_device *udev) -{ - char status_attr_path[SYSFS_PATH_MAX]; - int fd; - int length; - char status; - int value = 0; - - snprintf(status_attr_path, SYSFS_PATH_MAX, "%s/usbip_status", - udev->path); - - fd = open(status_attr_path, O_RDONLY); - if (fd < 0) { - err("error opening attribute %s", status_attr_path); - return -1; - } - - length = read(fd, &status, 1); - if (length < 0) { - err("error reading attribute %s", status_attr_path); - close(fd); - return -1; - } - - value = atoi(&status); - - return value; -} - -static -struct usbip_exported_device *usbip_exported_device_new(const char *sdevpath) -{ - struct usbip_exported_device *edev = NULL; - struct usbip_exported_device *edev_old; - size_t size; - int i; - - edev = calloc(1, sizeof(struct usbip_exported_device)); - - edev->sudev = udev_device_new_from_syspath(udev_context, sdevpath); - if (!edev->sudev) { - err("udev_device_new_from_syspath: %s", sdevpath); - goto err; - } - - read_usb_device(edev->sudev, &edev->udev); - - edev->status = read_attr_usbip_status(&edev->udev); - if (edev->status < 0) - goto err; - - /* reallocate buffer to include usb interface data */ - size = sizeof(struct usbip_exported_device) + - edev->udev.bNumInterfaces * sizeof(struct usbip_usb_interface); - - edev_old = edev; - edev = realloc(edev, size); - if (!edev) { - edev = edev_old; - dbg("realloc failed"); - goto err; - } - - for (i = 0; i < edev->udev.bNumInterfaces; i++) - read_usb_interface(&edev->udev, i, &edev->uinf[i]); - - return edev; -err: - if (edev->sudev) - udev_device_unref(edev->sudev); - if (edev) - free(edev); - - return NULL; -} - -static int refresh_exported_devices(void) +static int is_my_device(struct udev_device *dev) { - struct usbip_exported_device *edev; - struct udev_enumerate *enumerate; - struct udev_list_entry *devices, *dev_list_entry; - struct udev_device *dev; - const char *path; const char *driver; - enumerate = udev_enumerate_new(udev_context); - udev_enumerate_add_match_subsystem(enumerate, "usb"); - udev_enumerate_scan_devices(enumerate); - - devices = udev_enumerate_get_list_entry(enumerate); - - udev_list_entry_foreach(dev_list_entry, devices) { - path = udev_list_entry_get_name(dev_list_entry); - dev = udev_device_new_from_syspath(udev_context, path); - if (dev == NULL) - continue; - - /* Check whether device uses usbip-host driver. */ - driver = udev_device_get_driver(dev); - if (driver != NULL && !strcmp(driver, USBIP_HOST_DRV_NAME)) { - edev = usbip_exported_device_new(path); - if (!edev) { - dbg("usbip_exported_device_new failed"); - continue; - } - - list_add(&edev->node, &host_driver->edev_list); - host_driver->ndevs++; - } - } - - return 0; -} - -static void usbip_exported_device_destroy(void) -{ - struct list_head *i, *tmp; - struct usbip_exported_device *edev; - - list_for_each_safe(i, tmp, &host_driver->edev_list) { - edev = list_entry(i, struct usbip_exported_device, node); - list_del(i); - free(edev); - } + driver = udev_device_get_driver(dev); + return driver != NULL && !strcmp(driver, USBIP_HOST_DRV_NAME); } -int usbip_host_driver_open(void) +static int usbip_host_driver_open(struct usbip_host_driver *hdriver) { - int rc; - - udev_context = udev_new(); - if (!udev_context) { - err("udev_new failed"); - return -1; - } - - host_driver = calloc(1, sizeof(*host_driver)); - - host_driver->ndevs = 0; - INIT_LIST_HEAD(&host_driver->edev_list); - - rc = refresh_exported_devices(); - if (rc < 0) - goto err_free_host_driver; - - return 0; - -err_free_host_driver: - free(host_driver); - host_driver = NULL; - - udev_unref(udev_context); - - return -1; -} - -void usbip_host_driver_close(void) -{ - if (!host_driver) - return; - - usbip_exported_device_destroy(); - - free(host_driver); - host_driver = NULL; - - udev_unref(udev_context); -} - -int usbip_host_refresh_device_list(void) -{ - int rc; - - usbip_exported_device_destroy(); - - host_driver->ndevs = 0; - INIT_LIST_HEAD(&host_driver->edev_list); - - rc = refresh_exported_devices(); - if (rc < 0) - return -1; - - return 0; -} - -int usbip_host_export_device(struct usbip_exported_device *edev, int sockfd) -{ - char attr_name[] = "usbip_sockfd"; - char sockfd_attr_path[SYSFS_PATH_MAX]; - char sockfd_buff[30]; int ret; - if (edev->status != SDEV_ST_AVAILABLE) { - dbg("device not available: %s", edev->udev.busid); - switch (edev->status) { - case SDEV_ST_ERROR: - dbg("status SDEV_ST_ERROR"); - break; - case SDEV_ST_USED: - dbg("status SDEV_ST_USED"); - break; - default: - dbg("status unknown: 0x%x", edev->status); - } - return -1; - } - - /* only the first interface is true */ - snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s", - edev->udev.path, attr_name); - - snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd); - - ret = write_sysfs_attribute(sockfd_attr_path, sockfd_buff, - strlen(sockfd_buff)); - if (ret < 0) { - err("write_sysfs_attribute failed: sockfd %s to %s", - sockfd_buff, sockfd_attr_path); - return ret; - } - - info("connect: %s", edev->udev.busid); + hdriver->ndevs = 0; + INIT_LIST_HEAD(&hdriver->edev_list); + ret = usbip_generic_driver_open(hdriver); + if (ret) + err("please load " USBIP_CORE_MOD_NAME ".ko and " + USBIP_HOST_DRV_NAME ".ko!"); return ret; } -struct usbip_exported_device *usbip_host_get_device(int num) -{ - struct list_head *i; - struct usbip_exported_device *edev; - int cnt = 0; - - list_for_each(i, &host_driver->edev_list) { - edev = list_entry(i, struct usbip_exported_device, node); - if (num == cnt) - return edev; - else - cnt++; - } - - return NULL; -} +struct usbip_host_driver host_driver = { + .edev_list = LIST_HEAD_INIT(host_driver.edev_list), + .udev_subsystem = "usb", + .ops = { + .open = usbip_host_driver_open, + .close = usbip_generic_driver_close, + .refresh_device_list = usbip_generic_refresh_device_list, + .get_device = usbip_generic_get_device, + .read_device = read_usb_device, + .read_interface = read_usb_interface, + .is_my_device = is_my_device, + }, +}; diff --git a/tools/usb/usbip/libsrc/usbip_host_driver.h b/tools/usb/usbip/libsrc/usbip_host_driver.h index 2a31f855c616..77f07e72a7fe 100644 --- a/tools/usb/usbip/libsrc/usbip_host_driver.h +++ b/tools/usb/usbip/libsrc/usbip_host_driver.h @@ -1,6 +1,9 @@ /* * Copyright (C) 2011 matt mooney * 2005-2007 Takahiro Hirofuchi + * Copyright (C) 2015-2016 Samsung Electronics + * Igor Kotrasinski + * Krzysztof Opasiak * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,28 +25,8 @@ #include #include "usbip_common.h" #include "list.h" +#include "usbip_host_common.h" -struct usbip_host_driver { - int ndevs; - /* list of exported device */ - struct list_head edev_list; -}; - -struct usbip_exported_device { - struct udev_device *sudev; - int32_t status; - struct usbip_usb_device udev; - struct list_head node; - struct usbip_usb_interface uinf[]; -}; - -extern struct usbip_host_driver *host_driver; - -int usbip_host_driver_open(void); -void usbip_host_driver_close(void); - -int usbip_host_refresh_device_list(void); -int usbip_host_export_device(struct usbip_exported_device *edev, int sockfd); -struct usbip_exported_device *usbip_host_get_device(int num); +extern struct usbip_host_driver host_driver; #endif /* __USBIP_HOST_DRIVER_H */ diff --git a/tools/usb/usbip/src/usbipd.c b/tools/usb/usbip/src/usbipd.c index 2a7cd2b8d966..8a2ec4dab4bd 100644 --- a/tools/usb/usbip/src/usbipd.c +++ b/tools/usb/usbip/src/usbipd.c @@ -1,6 +1,9 @@ /* * Copyright (C) 2011 matt mooney * 2005-2007 Takahiro Hirofuchi + * Copyright (C) 2015-2016 Samsung Electronics + * Igor Kotrasinski + * Krzysztof Opasiak * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -41,6 +44,7 @@ #include #include "usbip_host_driver.h" +#include "usbip_host_common.h" #include "usbip_common.h" #include "usbip_network.h" #include "list.h" @@ -83,6 +87,8 @@ static const char usbipd_help_string[] = " -v, --version\n" " Show version.\n"; +static struct usbip_host_driver *driver; + static void usbipd_help(void) { printf("%s\n", usbipd_help_string); @@ -107,7 +113,7 @@ static int recv_request_import(int sockfd) } PACK_OP_IMPORT_REQUEST(0, &req); - list_for_each(i, &host_driver->edev_list) { + list_for_each(i, &driver->edev_list) { edev = list_entry(i, struct usbip_exported_device, node); if (!strncmp(req.busid, edev->udev.busid, SYSFS_BUS_ID_SIZE)) { info("found requested device: %s", req.busid); @@ -121,7 +127,7 @@ static int recv_request_import(int sockfd) usbip_net_set_nodelay(sockfd); /* export device needs a TCP/IP socket descriptor */ - rc = usbip_host_export_device(edev, sockfd); + rc = usbip_export_device(edev, sockfd); if (rc < 0) error = 1; } else { @@ -166,7 +172,7 @@ static int send_reply_devlist(int connfd) reply.ndev = 0; /* number of exported devices */ - list_for_each(j, &host_driver->edev_list) { + list_for_each(j, &driver->edev_list) { reply.ndev += 1; } info("exportable devices: %d", reply.ndev); @@ -184,7 +190,7 @@ static int send_reply_devlist(int connfd) return -1; } - list_for_each(j, &host_driver->edev_list) { + list_for_each(j, &driver->edev_list) { edev = list_entry(j, struct usbip_exported_device, node); dump_usb_device(&edev->udev); memcpy(&pdu_udev, &edev->udev, sizeof(pdu_udev)); @@ -246,7 +252,7 @@ static int recv_pdu(int connfd) return -1; } - ret = usbip_host_refresh_device_list(); + ret = usbip_refresh_device_list(driver); if (ret < 0) { dbg("could not refresh device list: %d", ret); return -1; @@ -491,16 +497,13 @@ static int do_standalone_mode(int daemonize, int ipv4, int ipv6) struct timespec timeout; sigset_t sigmask; - if (usbip_host_driver_open()) { - err("please load " USBIP_CORE_MOD_NAME ".ko and " - USBIP_HOST_DRV_NAME ".ko!"); + if (usbip_driver_open(driver)) return -1; - } if (daemonize) { if (daemon(0, 0) < 0) { err("daemonizing failed: %s", strerror(errno)); - usbip_host_driver_close(); + usbip_driver_close(driver); return -1; } umask(0); @@ -525,7 +528,7 @@ static int do_standalone_mode(int daemonize, int ipv4, int ipv6) ai_head = do_getaddrinfo(NULL, family); if (!ai_head) { - usbip_host_driver_close(); + usbip_driver_close(driver); return -1; } nsockfd = listen_all_addrinfo(ai_head, sockfdlist, @@ -533,7 +536,7 @@ static int do_standalone_mode(int daemonize, int ipv4, int ipv6) freeaddrinfo(ai_head); if (nsockfd <= 0) { err("failed to open a listening socket"); - usbip_host_driver_close(); + usbip_driver_close(driver); return -1; } @@ -574,7 +577,7 @@ static int do_standalone_mode(int daemonize, int ipv4, int ipv6) info("shutting down " PROGNAME); free(fds); - usbip_host_driver_close(); + usbip_driver_close(driver); return 0; } @@ -613,6 +616,7 @@ int main(int argc, char *argv[]) err("not running as root?"); cmd = cmd_standalone_mode; + driver = &host_driver; for (;;) { opt = getopt_long(argc, argv, "46DdP::t:hv", longopts, NULL); -- cgit From 7b3f74f7e0601b2767aee7e188b1e3a912c082a2 Mon Sep 17 00:00:00 2001 From: Krzysztof Opasiak Date: Tue, 8 Mar 2016 21:49:05 +0100 Subject: usbip: tools: Add vudc backend to usbip tools Adds an equivalent of usbip_host_driver for the vudc. Most of the code is already shared, but this adds some vudc specific code for getting information about devices. Based on code created in cooperation with Open Operating Systems Student Society at University of Warsaw (O2S3@UW) consisting of: Igor Kotrasinski Karol Kosik Ewelina Kosmider <3w3lfin@gmail.com> Dawid Lazarczyk Piotr Szulc Tutor and project owner: Krzysztof Opasiak Signed-off-by: Krzysztof Opasiak Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/libsrc/Makefile.am | 1 + tools/usb/usbip/libsrc/usbip_common.h | 3 + tools/usb/usbip/libsrc/usbip_device_driver.c | 163 +++++++++++++++++++++++++++ tools/usb/usbip/libsrc/usbip_device_driver.h | 34 ++++++ 4 files changed, 201 insertions(+) create mode 100644 tools/usb/usbip/libsrc/usbip_device_driver.c create mode 100644 tools/usb/usbip/libsrc/usbip_device_driver.h (limited to 'tools') diff --git a/tools/usb/usbip/libsrc/Makefile.am b/tools/usb/usbip/libsrc/Makefile.am index eb62f99cc0ee..90daf95c0804 100644 --- a/tools/usb/usbip/libsrc/Makefile.am +++ b/tools/usb/usbip/libsrc/Makefile.am @@ -4,6 +4,7 @@ libusbip_la_LDFLAGS = -version-info @LIBUSBIP_VERSION@ lib_LTLIBRARIES := libusbip.la libusbip_la_SOURCES := names.c names.h usbip_host_driver.c usbip_host_driver.h \ + usbip_device_driver.c usbip_device_driver.h \ usbip_common.c usbip_common.h usbip_host_common.h \ usbip_host_common.c vhci_driver.c vhci_driver.h \ sysfs_utils.c sysfs_utils.h diff --git a/tools/usb/usbip/libsrc/usbip_common.h b/tools/usb/usbip/libsrc/usbip_common.h index 15fe792e1e96..51ef5fe485dd 100644 --- a/tools/usb/usbip/libsrc/usbip_common.h +++ b/tools/usb/usbip/libsrc/usbip_common.h @@ -25,9 +25,12 @@ #define VHCI_STATE_PATH "/var/run/vhci_hcd" #endif +#define VUDC_DEVICE_DESCR_FILE "dev_desc" + /* kernel module names */ #define USBIP_CORE_MOD_NAME "usbip-core" #define USBIP_HOST_DRV_NAME "usbip-host" +#define USBIP_DEVICE_DRV_NAME "usbip-vudc" #define USBIP_VHCI_DRV_NAME "vhci_hcd" /* sysfs constants */ diff --git a/tools/usb/usbip/libsrc/usbip_device_driver.c b/tools/usb/usbip/libsrc/usbip_device_driver.c new file mode 100644 index 000000000000..e059b7d1ec5b --- /dev/null +++ b/tools/usb/usbip/libsrc/usbip_device_driver.c @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2015 Karol Kosik + * 2015 Samsung Electronics + * Author: Igor Kotrasinski + * + * Based on tools/usb/usbip/libsrc/usbip_host_driver.c, which is: + * Copyright (C) 2011 matt mooney + * 2005-2007 Takahiro Hirofuchi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include + +#include "usbip_host_common.h" +#include "usbip_device_driver.h" + +#undef PROGNAME +#define PROGNAME "libusbip" + +#define copy_descr_attr16(dev, descr, attr) \ + ((dev)->attr = le16toh((descr)->attr)) \ + +#define copy_descr_attr(dev, descr, attr) \ + ((dev)->attr = (descr)->attr) \ + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +static struct { + enum usb_device_speed speed; + const char *name; +} speed_names[] = { + { + .speed = USB_SPEED_UNKNOWN, + .name = "UNKNOWN", + }, + { + .speed = USB_SPEED_LOW, + .name = "low-speed", + }, + { + .speed = USB_SPEED_FULL, + .name = "full-speed", + }, + { + .speed = USB_SPEED_HIGH, + .name = "high-speed", + }, + { + .speed = USB_SPEED_WIRELESS, + .name = "wireless", + }, + { + .speed = USB_SPEED_SUPER, + .name = "super-speed", + }, +}; + +static +int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev) +{ + const char *path, *name; + char filepath[SYSFS_PATH_MAX]; + struct usb_device_descriptor descr; + unsigned i; + FILE *fd = NULL; + struct udev_device *plat; + const char *speed; + int ret = 0; + + plat = udev_device_get_parent(sdev); + path = udev_device_get_syspath(plat); + snprintf(filepath, SYSFS_PATH_MAX, "%s/%s", + path, VUDC_DEVICE_DESCR_FILE); + fd = fopen(filepath, "r"); + if (!fd) + return -1; + ret = fread((char *) &descr, sizeof(descr), 1, fd); + if (ret < 0) + return -1; + fclose(fd); + + copy_descr_attr(dev, &descr, bDeviceClass); + copy_descr_attr(dev, &descr, bDeviceSubClass); + copy_descr_attr(dev, &descr, bDeviceProtocol); + copy_descr_attr(dev, &descr, bNumConfigurations); + copy_descr_attr16(dev, &descr, idVendor); + copy_descr_attr16(dev, &descr, idProduct); + copy_descr_attr16(dev, &descr, bcdDevice); + + strncpy(dev->path, path, SYSFS_PATH_MAX); + + dev->speed = USB_SPEED_UNKNOWN; + speed = udev_device_get_sysattr_value(sdev, "current_speed"); + if (speed) { + for (i = 0; i < ARRAY_SIZE(speed_names); i++) { + if (!strcmp(speed_names[i].name, speed)) { + dev->speed = speed_names[i].speed; + break; + } + } + } + + /* Only used for user output, little sense to output them in general */ + dev->bNumInterfaces = 0; + dev->bConfigurationValue = 0; + dev->busnum = 0; + + name = udev_device_get_sysname(plat); + strncpy(dev->busid, name, SYSFS_BUS_ID_SIZE); + return 0; +} + +static int is_my_device(struct udev_device *dev) +{ + const char *driver; + + driver = udev_device_get_property_value(dev, "USB_UDC_NAME"); + return driver != NULL && !strcmp(driver, USBIP_DEVICE_DRV_NAME); +} + +static int usbip_device_driver_open(struct usbip_host_driver *hdriver) +{ + int ret; + + hdriver->ndevs = 0; + INIT_LIST_HEAD(&hdriver->edev_list); + + ret = usbip_generic_driver_open(hdriver); + if (ret) + err("please load " USBIP_CORE_MOD_NAME ".ko and " + USBIP_DEVICE_DRV_NAME ".ko!"); + + return ret; +} + +struct usbip_host_driver device_driver = { + .edev_list = LIST_HEAD_INIT(device_driver.edev_list), + .udev_subsystem = "udc", + .ops = { + .open = usbip_device_driver_open, + .close = usbip_generic_driver_close, + .refresh_device_list = usbip_generic_refresh_device_list, + .get_device = usbip_generic_get_device, + .read_device = read_usb_vudc_device, + .is_my_device = is_my_device, + }, +}; diff --git a/tools/usb/usbip/libsrc/usbip_device_driver.h b/tools/usb/usbip/libsrc/usbip_device_driver.h new file mode 100644 index 000000000000..54cb658b37a3 --- /dev/null +++ b/tools/usb/usbip/libsrc/usbip_device_driver.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2015 Karol Kosik + * 2015 Samsung Electronics + * Author: Igor Kotrasinski + * + * Based on tools/usb/usbip/libsrc/usbip_host_driver.c, which is: + * Copyright (C) 2011 matt mooney + * 2005-2007 Takahiro Hirofuchi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __USBIP_DEVICE_DRIVER_H +#define __USBIP_DEVICE_DRIVER_H + +#include +#include "usbip_common.h" +#include "usbip_host_common.h" +#include "list.h" + +extern struct usbip_host_driver device_driver; + +#endif /* __USBIP_DEVICE_DRIVER_H */ -- cgit From e0546fd8b748b19d8edd1550530da8ebad6e4b31 Mon Sep 17 00:00:00 2001 From: Igor Kotrasinski Date: Tue, 8 Mar 2016 21:49:06 +0100 Subject: usbip: tools: Start using VUDC backend in usbip tools Modify userspace tools to allow exporting and connecting to vudc. This commit is a result of cooperation between Samsung R&D Institute Poland and Open Operating Systems Student Society at University of Warsaw (O2S3@UW) consisting of: Igor Kotrasinski Karol Kosik Ewelina Kosmider <3w3lfin@gmail.com> Dawid Lazarczyk Piotr Szulc Tutor and project owner: Krzysztof Opasiak Signed-off-by: Igor Kotrasinski Signed-off-by: Ewelina Kosmider <3w3lfin@gmail.com> [Various bug fixes and improvements] Signed-off-by: Krzysztof Opasiak Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip_attach.c | 10 +++- tools/usb/usbip/src/usbip_list.c | 96 ++++++++++++++++++++++++++++++++++++-- tools/usb/usbip/src/usbipd.c | 12 ++++- 3 files changed, 112 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/usb/usbip/src/usbip_attach.c b/tools/usb/usbip/src/usbip_attach.c index d58a14dfc094..70a6b507fb62 100644 --- a/tools/usb/usbip/src/usbip_attach.c +++ b/tools/usb/usbip/src/usbip_attach.c @@ -1,6 +1,9 @@ /* * Copyright (C) 2011 matt mooney * 2005-2007 Takahiro Hirofuchi + * Copyright (C) 2015-2016 Samsung Electronics + * Igor Kotrasinski + * Krzysztof Opasiak * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,7 +39,8 @@ static const char usbip_attach_usage_string[] = "usbip attach \n" " -r, --remote= The machine with exported USB devices\n" - " -b, --busid= Busid of the device on \n"; + " -b, --busid= Busid of the device on \n" + " -d, --device= Id of the virtual UDC on \n"; void usbip_attach_usage(void) { @@ -203,6 +207,7 @@ int usbip_attach(int argc, char *argv[]) static const struct option opts[] = { { "remote", required_argument, NULL, 'r' }, { "busid", required_argument, NULL, 'b' }, + { "device", required_argument, NULL, 'd' }, { NULL, 0, NULL, 0 } }; char *host = NULL; @@ -211,7 +216,7 @@ int usbip_attach(int argc, char *argv[]) int ret = -1; for (;;) { - opt = getopt_long(argc, argv, "r:b:", opts, NULL); + opt = getopt_long(argc, argv, "d:r:b:", opts, NULL); if (opt == -1) break; @@ -220,6 +225,7 @@ int usbip_attach(int argc, char *argv[]) case 'r': host = optarg; break; + case 'd': case 'b': busid = optarg; break; diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c index d5ce34a410e7..f1b38e866dd7 100644 --- a/tools/usb/usbip/src/usbip_list.c +++ b/tools/usb/usbip/src/usbip_list.c @@ -1,6 +1,9 @@ /* * Copyright (C) 2011 matt mooney * 2005-2007 Takahiro Hirofuchi + * Copyright (C) 2015-2016 Samsung Electronics + * Igor Kotrasinski + * Krzysztof Opasiak * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,6 +33,10 @@ #include #include +#include + +#include + #include "usbip_common.h" #include "usbip_network.h" #include "usbip.h" @@ -205,8 +212,10 @@ static int list_devices(bool parsable) /* Get device information. */ idVendor = udev_device_get_sysattr_value(dev, "idVendor"); idProduct = udev_device_get_sysattr_value(dev, "idProduct"); - bConfValue = udev_device_get_sysattr_value(dev, "bConfigurationValue"); - bNumIntfs = udev_device_get_sysattr_value(dev, "bNumInterfaces"); + bConfValue = udev_device_get_sysattr_value(dev, + "bConfigurationValue"); + bNumIntfs = udev_device_get_sysattr_value(dev, + "bNumInterfaces"); busid = udev_device_get_sysname(dev); if (!idVendor || !idProduct || !bConfValue || !bNumIntfs) { err("problem getting device attributes: %s", @@ -237,12 +246,90 @@ err_out: return ret; } +static int list_gadget_devices(bool parsable) +{ + int ret = -1; + struct udev *udev; + struct udev_enumerate *enumerate; + struct udev_list_entry *devices, *dev_list_entry; + struct udev_device *dev; + const char *path; + const char *driver; + + const struct usb_device_descriptor *d_desc; + const char *descriptors; + char product_name[128]; + + uint16_t idVendor; + char idVendor_buf[8]; + uint16_t idProduct; + char idProduct_buf[8]; + const char *busid; + + udev = udev_new(); + enumerate = udev_enumerate_new(udev); + + udev_enumerate_add_match_subsystem(enumerate, "platform"); + + udev_enumerate_scan_devices(enumerate); + devices = udev_enumerate_get_list_entry(enumerate); + + udev_list_entry_foreach(dev_list_entry, devices) { + path = udev_list_entry_get_name(dev_list_entry); + dev = udev_device_new_from_syspath(udev, path); + + driver = udev_device_get_driver(dev); + /* We only have mechanism to enumerate gadgets bound to vudc */ + if (driver == NULL || strcmp(driver, USBIP_DEVICE_DRV_NAME)) + continue; + + /* Get device information. */ + descriptors = udev_device_get_sysattr_value(dev, + VUDC_DEVICE_DESCR_FILE); + + if (!descriptors) { + err("problem getting device attributes: %s", + strerror(errno)); + goto err_out; + } + + d_desc = (const struct usb_device_descriptor *) descriptors; + + idVendor = le16toh(d_desc->idVendor); + sprintf(idVendor_buf, "0x%4x", idVendor); + idProduct = le16toh(d_desc->idProduct); + sprintf(idProduct_buf, "0x%4x", idVendor); + busid = udev_device_get_sysname(dev); + + /* Get product name. */ + usbip_names_get_product(product_name, sizeof(product_name), + le16toh(idVendor), + le16toh(idProduct)); + + /* Print information. */ + print_device(busid, idVendor_buf, idProduct_buf, parsable); + print_product_name(product_name, parsable); + + printf("\n"); + + udev_device_unref(dev); + } + ret = 0; + +err_out: + udev_enumerate_unref(enumerate); + udev_unref(udev); + + return ret; +} + int usbip_list(int argc, char *argv[]) { static const struct option opts[] = { { "parsable", no_argument, NULL, 'p' }, { "remote", required_argument, NULL, 'r' }, { "local", no_argument, NULL, 'l' }, + { "device", no_argument, NULL, 'd' }, { NULL, 0, NULL, 0 } }; @@ -254,7 +341,7 @@ int usbip_list(int argc, char *argv[]) err("failed to open %s", USBIDS_FILE); for (;;) { - opt = getopt_long(argc, argv, "pr:l", opts, NULL); + opt = getopt_long(argc, argv, "pr:ld", opts, NULL); if (opt == -1) break; @@ -269,6 +356,9 @@ int usbip_list(int argc, char *argv[]) case 'l': ret = list_devices(parsable); goto out; + case 'd': + ret = list_gadget_devices(parsable); + goto out; default: goto err_out; } diff --git a/tools/usb/usbip/src/usbipd.c b/tools/usb/usbip/src/usbipd.c index 8a2ec4dab4bd..a0972dea9e6c 100644 --- a/tools/usb/usbip/src/usbipd.c +++ b/tools/usb/usbip/src/usbipd.c @@ -45,6 +45,7 @@ #include "usbip_host_driver.h" #include "usbip_host_common.h" +#include "usbip_device_driver.h" #include "usbip_common.h" #include "usbip_network.h" #include "list.h" @@ -68,6 +69,11 @@ static const char usbipd_help_string[] = " -6, --ipv6\n" " Bind to IPv6. Default is both.\n" "\n" + " -e, --device\n" + " Run in device mode.\n" + " Rather than drive an attached device, create\n" + " a virtual UDC to bind gadgets to.\n" + "\n" " -D, --daemon\n" " Run as a daemon process.\n" "\n" @@ -590,6 +596,7 @@ int main(int argc, char *argv[]) { "daemon", no_argument, NULL, 'D' }, { "daemon", no_argument, NULL, 'D' }, { "debug", no_argument, NULL, 'd' }, + { "device", no_argument, NULL, 'e' }, { "pid", optional_argument, NULL, 'P' }, { "tcp-port", required_argument, NULL, 't' }, { "help", no_argument, NULL, 'h' }, @@ -618,7 +625,7 @@ int main(int argc, char *argv[]) cmd = cmd_standalone_mode; driver = &host_driver; for (;;) { - opt = getopt_long(argc, argv, "46DdP::t:hv", longopts, NULL); + opt = getopt_long(argc, argv, "46DdeP::t:hv", longopts, NULL); if (opt == -1) break; @@ -648,6 +655,9 @@ int main(int argc, char *argv[]) case 'v': cmd = cmd_version; break; + case 'e': + driver = &device_driver; + break; case '?': usbipd_help(); default: -- cgit From e3654eca70d63704c94a60a2aafc0b3c7b46a00b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 28 Apr 2016 16:17:07 -0700 Subject: nfit, libnvdimm: clarify "commands" vs "_DSMs" Clarify the distinction between "commands", the ioctls userspace calls to request the kernel take some action on a given dimm device, and "_DSMs", the actual function numbers used in the firmware interface to the DIMM. _DSMs are ACPI specific whereas commands are Linux kernel generic. This is in preparation for breaking the 1:1 implicit relationship between the kernel ioctl number space and the firmware specific function numbers. Cc: Jerry Hoemann Cc: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 21 +++++++++++++-------- drivers/acpi/nfit.h | 4 ++-- drivers/nvdimm/bus.c | 8 ++++---- drivers/nvdimm/core.c | 2 +- drivers/nvdimm/dimm_devs.c | 18 ++++++++++++------ drivers/nvdimm/nd-core.h | 2 +- include/linux/libnvdimm.h | 5 +++-- tools/testing/nvdimm/test/nfit.c | 27 ++++++++++++++------------- 8 files changed, 50 insertions(+), 37 deletions(-) (limited to 'tools') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index d0f35e63640b..1b98e9dc6138 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -175,7 +175,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, union acpi_object in_obj, in_buf, *out_obj; struct device *dev = acpi_desc->dev; const char *cmd_name, *dimm_name; - unsigned long dsm_mask; + unsigned long cmd_mask; acpi_handle handle; const u8 *uuid; u32 offset; @@ -189,7 +189,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, return -ENOTTY; dimm_name = nvdimm_name(nvdimm); cmd_name = nvdimm_cmd_name(cmd); - dsm_mask = nfit_mem->dsm_mask; + cmd_mask = nvdimm_cmd_mask(nvdimm); desc = nd_cmd_dimm_desc(cmd); uuid = to_nfit_uuid(NFIT_DEV_DIMM); handle = adev->handle; @@ -197,7 +197,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct acpi_device *adev = to_acpi_dev(acpi_desc); cmd_name = nvdimm_bus_cmd_name(cmd); - dsm_mask = nd_desc->dsm_mask; + cmd_mask = nd_desc->cmd_mask; desc = nd_cmd_bus_desc(cmd); uuid = to_nfit_uuid(NFIT_DEV_BUS); handle = adev->handle; @@ -207,7 +207,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, if (!desc || (cmd && (desc->out_num + desc->in_num == 0))) return -ENOTTY; - if (!test_bit(cmd, &dsm_mask)) + if (!test_bit(cmd, &cmd_mask)) return -ENOTTY; in_obj.type = ACPI_TYPE_PACKAGE; @@ -926,7 +926,8 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM); int i; - nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en; + /* nfit test assumes 1:1 relationship between commands and dsms */ + nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en; adev = to_acpi_dev(acpi_desc); if (!adev) return 0; @@ -976,9 +977,13 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) if (rc) continue; + /* + * For now there is 1:1 relationship between cmd_mask and + * dsm_mask. + */ nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem, acpi_nfit_dimm_attribute_groups, - flags, &nfit_mem->dsm_mask); + flags, nfit_mem->dsm_mask); if (!nvdimm) return -ENOMEM; @@ -1007,14 +1012,14 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) struct acpi_device *adev; int i; - nd_desc->dsm_mask = acpi_desc->bus_dsm_force_en; + nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; adev = to_acpi_dev(acpi_desc); if (!adev) return; for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i)) - set_bit(i, &nd_desc->dsm_mask); + set_bit(i, &nd_desc->cmd_mask); } static ssize_t range_index_show(struct device *dev, diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index c75576b2d50e..332ee6f01662 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -132,8 +132,8 @@ struct acpi_nfit_desc { size_t ars_status_size; struct work_struct work; unsigned int cancel:1; - unsigned long dimm_dsm_force_en; - unsigned long bus_dsm_force_en; + unsigned long dimm_cmd_force_en; + unsigned long bus_cmd_force_en; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, void *iobuf, u64 len, int rw); }; diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 19f822d7f652..cb2042a12b76 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -589,24 +589,24 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, void __user *p = (void __user *) arg; struct device *dev = &nvdimm_bus->dev; const char *cmd_name, *dimm_name; - unsigned long dsm_mask; + unsigned long cmd_mask; void *buf; int rc, i; if (nvdimm) { desc = nd_cmd_dimm_desc(cmd); cmd_name = nvdimm_cmd_name(cmd); - dsm_mask = nvdimm->dsm_mask ? *(nvdimm->dsm_mask) : 0; + cmd_mask = nvdimm->cmd_mask; dimm_name = dev_name(&nvdimm->dev); } else { desc = nd_cmd_bus_desc(cmd); cmd_name = nvdimm_bus_cmd_name(cmd); - dsm_mask = nd_desc->dsm_mask; + cmd_mask = nd_desc->cmd_mask; dimm_name = "bus"; } if (!desc || (desc->out_num + desc->in_num == 0) || - !test_bit(cmd, &dsm_mask)) + !test_bit(cmd, &cmd_mask)) return -ENOTTY; /* fail write commands (when read-only) */ diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 182a93fe3712..e8688a13cf4f 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -251,7 +251,7 @@ static ssize_t commands_show(struct device *dev, struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; - for_each_set_bit(cmd, &nd_desc->dsm_mask, BITS_PER_LONG) + for_each_set_bit(cmd, &nd_desc->cmd_mask, BITS_PER_LONG) len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd)); len += sprintf(buf + len, "\n"); return len; diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index c56f88217924..79a35a02053c 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -37,9 +37,9 @@ static int __validate_dimm(struct nvdimm_drvdata *ndd) nvdimm = to_nvdimm(ndd->dev); - if (!nvdimm->dsm_mask) + if (!nvdimm->cmd_mask) return -ENXIO; - if (!test_bit(ND_CMD_GET_CONFIG_DATA, nvdimm->dsm_mask)) + if (!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) return -ENXIO; return 0; @@ -263,6 +263,12 @@ const char *nvdimm_name(struct nvdimm *nvdimm) } EXPORT_SYMBOL_GPL(nvdimm_name); +unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm) +{ + return nvdimm->cmd_mask; +} +EXPORT_SYMBOL_GPL(nvdimm_cmd_mask); + void *nvdimm_provider_data(struct nvdimm *nvdimm) { if (nvdimm) @@ -277,10 +283,10 @@ static ssize_t commands_show(struct device *dev, struct nvdimm *nvdimm = to_nvdimm(dev); int cmd, len = 0; - if (!nvdimm->dsm_mask) + if (!nvdimm->cmd_mask) return sprintf(buf, "\n"); - for_each_set_bit(cmd, nvdimm->dsm_mask, BITS_PER_LONG) + for_each_set_bit(cmd, &nvdimm->cmd_mask, BITS_PER_LONG) len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd)); len += sprintf(buf + len, "\n"); return len; @@ -340,7 +346,7 @@ EXPORT_SYMBOL_GPL(nvdimm_attribute_group); struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, - unsigned long *dsm_mask) + unsigned long cmd_mask) { struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); struct device *dev; @@ -355,7 +361,7 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, } nvdimm->provider_data = provider_data; nvdimm->flags = flags; - nvdimm->dsm_mask = dsm_mask; + nvdimm->cmd_mask = cmd_mask; atomic_set(&nvdimm->busy, 0); dev = &nvdimm->dev; dev_set_name(dev, "nmem%d", nvdimm->id); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 1d1500f3d8b5..da0d322ed7cb 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -37,7 +37,7 @@ struct nvdimm_bus { struct nvdimm { unsigned long flags; void *provider_data; - unsigned long *dsm_mask; + unsigned long cmd_mask; struct device dev; atomic_t busy; int id; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index af31d1c6fdd7..0c3c30cbbea5 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -68,7 +68,7 @@ struct nd_mapping { struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; - unsigned long dsm_mask; + unsigned long cmd_mask; char *provider_name; ndctl_fn ndctl; int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); @@ -130,10 +130,11 @@ struct nd_region *to_nd_region(struct device *dev); struct nd_blk_region *to_nd_blk_region(struct device *dev); struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); const char *nvdimm_name(struct nvdimm *nvdimm); +unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm); void *nvdimm_provider_data(struct nvdimm *nvdimm); struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, - unsigned long *dsm_mask); + unsigned long cmd_mask); const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 3187322eeed7..ed899a411c22 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -344,8 +344,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, if (nvdimm) { struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm); - if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) + if (!nfit_mem || !test_bit(cmd, &cmd_mask)) return -ENOTTY; /* lookup label space for the given dimm */ @@ -374,7 +375,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, } else { struct ars_state *ars_state = &t->ars_state; - if (!nd_desc || !test_bit(cmd, &nd_desc->dsm_mask)) + if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask)) return -ENOTTY; switch (cmd) { @@ -1251,13 +1252,13 @@ static void nfit_test0_setup(struct nfit_test *t) post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE); acpi_desc = &t->acpi_desc; - set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); - set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); - set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); - set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); } static void nfit_test1_setup(struct nfit_test *t) @@ -1315,10 +1316,10 @@ static void nfit_test1_setup(struct nfit_test *t) post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE); acpi_desc = &t->acpi_desc; - set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, -- cgit From 552beb4930ef3889d42a049eb9ba3533b4cbe0f6 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 30 Apr 2016 19:21:37 -0700 Subject: tools: hv: lsvmbus: add pci pass-through UUID lsvmbus keeps its own copy of all VMBus UUIDs, add PCIe pass-through device there to not report 'Unknown' for such devices. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- tools/hv/lsvmbus | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus index 162a3784d80e..e8fecd61871f 100644 --- a/tools/hv/lsvmbus +++ b/tools/hv/lsvmbus @@ -35,6 +35,7 @@ vmbus_dev_dict = { '{ba6163d9-04a1-4d29-b605-72e2ffb1dc7f}' : 'Synthetic SCSI Controller', '{2f9bcc4a-0069-4af3-b76b-6fd0be528cda}' : 'Synthetic fiber channel adapter', '{8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501}' : 'Synthetic RDMA adapter', + '{44c4f61d-4444-4400-9d52-802e27ede19f}' : 'PCI Express pass-through', '{276aacf4-ac15-426c-98dd-7521ad3f01fe}' : '[Reserved system device]', '{f8e65716-3cb3-4a06-9a60-1889c5cccab5}' : '[Reserved system device]', '{3375baf4-9e15-4b30-b765-67acb10d607b}' : '[Reserved system device]', -- cgit From e66fa8b08fbd87f375f964f1eaa1f5dfab9dc0c4 Mon Sep 17 00:00:00 2001 From: Nobuo Iwata Date: Tue, 22 Mar 2016 16:31:03 +0900 Subject: usbip: adding names db to port operation Adding names database to port command. BEFORE) 'unknown' for vendor and product string. Imported USB devices ==================== Port 00: at Low Speed(1.5Mbps) unknown vendor : unknown product (03f0:0224) 3-1 -> usbip://10.0.2.15:3240/5-1 -> remote bus/dev 005/002 AFTER) Most vendor string will be converted. Imported USB devices ==================== Port 00: at Low Speed(1.5Mbps) Hewlett-Packard : unknown product (03f0:0224) 3-1 -> usbip://10.0.2.15:3240/5-1 -> remote bus/dev 005/002 Signed-off-by: Nobuo Iwata Signed-off-by: Greg Kroah-Hartman --- tools/usb/usbip/src/usbip_port.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/usb/usbip/src/usbip_port.c b/tools/usb/usbip/src/usbip_port.c index a2e884fd9226..7bd74fb3a9cd 100644 --- a/tools/usb/usbip/src/usbip_port.c +++ b/tools/usb/usbip/src/usbip_port.c @@ -22,10 +22,13 @@ static int list_imported_devices(void) struct usbip_imported_device *idev; int ret; + if (usbip_names_init(USBIDS_FILE)) + err("failed to open %s", USBIDS_FILE); + ret = usbip_vhci_driver_open(); if (ret < 0) { err("open vhci_driver"); - return -1; + goto err_names_free; } printf("Imported USB devices\n"); @@ -35,13 +38,19 @@ static int list_imported_devices(void) idev = &vhci_driver->idev[i]; if (usbip_vhci_imported_device_dump(idev) < 0) - ret = -1; + goto err_driver_close; } usbip_vhci_driver_close(); + usbip_names_free(); return ret; +err_driver_close: + usbip_vhci_driver_close(); +err_names_free: + usbip_names_free(); + return -1; } int usbip_port_show(__attribute__((unused)) int argc, -- cgit From 6634fb06906f52a3a3125e88681a7fa6e353f31d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 27 Apr 2016 16:46:15 -0600 Subject: tools/testing/nvdimm: ND_CMD_CALL support Enable nfit_test to use nd_cmd_pkg marshaling. Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index ed899a411c22..e09a300eb8e0 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -336,6 +336,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc); + unsigned int func = cmd; int i, rc = 0, __cmd_rc; if (!cmd_rc) @@ -346,7 +347,21 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm); - if (!nfit_mem || !test_bit(cmd, &cmd_mask)) + if (!nfit_mem) + return -ENOTTY; + + if (cmd == ND_CMD_CALL) { + struct nd_cmd_pkg *call_pkg = buf; + + buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out; + buf = (void *) call_pkg->nd_payload; + func = call_pkg->nd_command; + if (call_pkg->nd_family != nfit_mem->family) + return -ENOTTY; + } + + if (!test_bit(cmd, &cmd_mask) + || !test_bit(func, &nfit_mem->dsm_mask)) return -ENOTTY; /* lookup label space for the given dimm */ @@ -357,7 +372,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, if (i >= ARRAY_SIZE(handle)) return -ENXIO; - switch (cmd) { + switch (func) { case ND_CMD_GET_CONFIG_SIZE: rc = nfit_test_cmd_get_config_size(buf, buf_len); break; @@ -378,7 +393,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask)) return -ENOTTY; - switch (cmd) { + switch (func) { case ND_CMD_ARS_CAP: rc = nfit_test_cmd_ars_cap(buf, buf_len); break; -- cgit From 91e6f1ce8657795cec83a81090c20cbaa8337c68 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 9 May 2016 18:10:00 -0400 Subject: ftracetest: Add instance created, delete, read and enable event test Add a new ftrace test that creates three threads. One that creates and removes an ftrace instance, one that reads the instance, and one that enables and disables events in the instance. This is a stress test for accessing and removing instances at the same time. Signed-off-by: Steven Rostedt --- .../ftrace/test.d/instances/instance-event.tc | 143 +++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/instances/instance-event.tc (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc new file mode 100644 index 000000000000..5f2abd03f16b --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc @@ -0,0 +1,143 @@ +#!/bin/sh +# description: Test creation and deletion of trace instances while setting an event + +if [ ! -d instances ] ; then + echo "no instance directory with this kernel" + exit_unsupported; +fi + +fail() { # mesg + rmdir foo 2>/dev/null + echo $1 + set -e + exit $FAIL +} + +cd instances + +# we don't want to fail on error +set +e + +mkdir x +rmdir x +result=$? + +if [ $result -ne 0 ]; then + echo "instance rmdir not supported" + exit_unsupported +fi + +instance_slam() { + while :; do + mkdir foo 2> /dev/null + rmdir foo 2> /dev/null + done +} + +instance_read() { + while :; do + cat foo/trace 1> /dev/null 2>&1 + done +} + +instance_set() { + while :; do + echo 1 > foo/events/sched/sched_switch + done 2> /dev/null +} + +instance_slam & +p1=$! +echo $p1 + +instance_set & +p2=$! +echo $p2 + +instance_read & +p3=$! +echo $p3 + +sleep 1 + +kill -1 $p3 +kill -1 $p2 +kill -1 $p1 + +echo "Wait for processes to finish" +wait $p1 $p2 $p3 +echo "all processes finished, wait for cleanup" +sleep 1 + +mkdir foo +ls foo > /dev/null +rmdir foo +if [ -d foo ]; then + fail "foo still exists" +fi +exit 0 + + + + +instance_slam() { + while :; do + mkdir x + mkdir y + mkdir z + rmdir x + rmdir y + rmdir z + done 2>/dev/null +} + +instance_slam & +x=`jobs -l` +p1=`echo $x | cut -d' ' -f2` +echo $p1 + +instance_slam & +x=`jobs -l | tail -1` +p2=`echo $x | cut -d' ' -f2` +echo $p2 + +instance_slam & +x=`jobs -l | tail -1` +p3=`echo $x | cut -d' ' -f2` +echo $p3 + +instance_slam & +x=`jobs -l | tail -1` +p4=`echo $x | cut -d' ' -f2` +echo $p4 + +instance_slam & +x=`jobs -l | tail -1` +p5=`echo $x | cut -d' ' -f2` +echo $p5 + +ls -lR >/dev/null +sleep 1 + +kill -1 $p1 +kill -1 $p2 +kill -1 $p3 +kill -1 $p4 +kill -1 $p5 + +echo "Wait for processes to finish" +wait $p1 $p2 $p3 $p4 $p5 +echo "all processes finished, wait for cleanup" + +mkdir x y z +ls x y z +rmdir x y z +for d in x y z; do + if [ -d $d ]; then + fail "instance $d still exists" + fi +done + +set -e + +exit 0 -- cgit From cd03412a51ac4cb3001a8cdfae4560c9602f3387 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 11 Mar 2016 10:15:36 -0800 Subject: libnvdimm, dax: introduce device-dax infrastructure Device DAX is the device-centric analogue of Filesystem DAX (CONFIG_FS_DAX). It allows persistent memory ranges to be allocated and mapped without need of an intervening file system. This initial infrastructure arranges for a libnvdimm pfn-device to be represented as a different device-type so that it can be attached to a driver other than the pmem driver. Signed-off-by: Dan Williams --- drivers/nvdimm/Kconfig | 13 ++++++ drivers/nvdimm/Makefile | 1 + drivers/nvdimm/bus.c | 4 ++ drivers/nvdimm/claim.c | 2 + drivers/nvdimm/dax_devs.c | 99 +++++++++++++++++++++++++++++++++++++++ drivers/nvdimm/namespace_devs.c | 19 +++++++- drivers/nvdimm/nd-core.h | 1 + drivers/nvdimm/nd.h | 25 ++++++++++ drivers/nvdimm/pfn_devs.c | 100 +++++++++++++++++++++++++++------------- drivers/nvdimm/region.c | 2 + drivers/nvdimm/region_devs.c | 29 ++++++++++++ include/uapi/linux/ndctl.h | 2 + tools/testing/nvdimm/Kbuild | 1 + 13 files changed, 264 insertions(+), 34 deletions(-) create mode 100644 drivers/nvdimm/dax_devs.c (limited to 'tools') diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 53c11621d5b1..7c8a3bf07884 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -88,4 +88,17 @@ config NVDIMM_PFN Select Y if unsure +config NVDIMM_DAX + bool "NVDIMM DAX: Raw access to persistent memory" + default LIBNVDIMM + depends on NVDIMM_PFN + help + Support raw device dax access to a persistent memory + namespace. For environments that want to hard partition + peristent memory, this capability provides a mechanism to + sub-divide a namespace into character devices that can only be + accessed via DAX (mmap(2)). + + Select Y if unsure + endif diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index ea84d3c4e8e5..909554c3f955 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -23,3 +23,4 @@ libnvdimm-y += label.o libnvdimm-$(CONFIG_ND_CLAIM) += claim.o libnvdimm-$(CONFIG_BTT) += btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o +libnvdimm-$(CONFIG_NVDIMM_DAX) += dax_devs.o diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 19f822d7f652..97589e3cb852 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -40,6 +40,8 @@ static int to_nd_device_type(struct device *dev) return ND_DEVICE_REGION_PMEM; else if (is_nd_blk(dev)) return ND_DEVICE_REGION_BLK; + else if (is_nd_dax(dev)) + return ND_DEVICE_DAX_PMEM; else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent)) return nd_region_to_nstype(to_nd_region(dev->parent)); @@ -246,6 +248,8 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie) void __nd_device_register(struct device *dev) { + if (!dev) + return; dev->bus = &nvdimm_bus_type; get_device(dev); async_schedule_domain(nd_async_device_register, dev, diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 6bbd0a36994a..5f53db59a058 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -85,6 +85,8 @@ static bool is_idle(struct device *dev, struct nd_namespace_common *ndns) seed = nd_region->btt_seed; else if (is_nd_pfn(dev)) seed = nd_region->pfn_seed; + else if (is_nd_dax(dev)) + seed = nd_region->dax_seed; if (seed == dev || ndns || dev->driver) return false; diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c new file mode 100644 index 000000000000..f90f7549e7f4 --- /dev/null +++ b/drivers/nvdimm/dax_devs.c @@ -0,0 +1,99 @@ +/* + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include "nd-core.h" +#include "nd.h" + +static void nd_dax_release(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_dax *nd_dax = to_nd_dax(dev); + struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; + + dev_dbg(dev, "%s\n", __func__); + nd_detach_ndns(dev, &nd_pfn->ndns); + ida_simple_remove(&nd_region->dax_ida, nd_pfn->id); + kfree(nd_pfn->uuid); + kfree(nd_dax); +} + +static struct device_type nd_dax_device_type = { + .name = "nd_dax", + .release = nd_dax_release, +}; + +bool is_nd_dax(struct device *dev) +{ + return dev ? dev->type == &nd_dax_device_type : false; +} +EXPORT_SYMBOL(is_nd_dax); + +struct nd_dax *to_nd_dax(struct device *dev) +{ + struct nd_dax *nd_dax = container_of(dev, struct nd_dax, nd_pfn.dev); + + WARN_ON(!is_nd_dax(dev)); + return nd_dax; +} +EXPORT_SYMBOL(to_nd_dax); + +static const struct attribute_group *nd_dax_attribute_groups[] = { + &nd_pfn_attribute_group, + &nd_device_attribute_group, + &nd_numa_attribute_group, + NULL, +}; + +static struct nd_dax *nd_dax_alloc(struct nd_region *nd_region) +{ + struct nd_pfn *nd_pfn; + struct nd_dax *nd_dax; + struct device *dev; + + nd_dax = kzalloc(sizeof(*nd_dax), GFP_KERNEL); + if (!nd_dax) + return NULL; + + nd_pfn = &nd_dax->nd_pfn; + nd_pfn->id = ida_simple_get(&nd_region->dax_ida, 0, 0, GFP_KERNEL); + if (nd_pfn->id < 0) { + kfree(nd_dax); + return NULL; + } + + dev = &nd_pfn->dev; + dev_set_name(dev, "dax%d.%d", nd_region->id, nd_pfn->id); + dev->groups = nd_dax_attribute_groups; + dev->type = &nd_dax_device_type; + dev->parent = &nd_region->dev; + + return nd_dax; +} + +struct device *nd_dax_create(struct nd_region *nd_region) +{ + struct device *dev = NULL; + struct nd_dax *nd_dax; + + if (!is_nd_pmem(&nd_region->dev)) + return NULL; + + nd_dax = nd_dax_alloc(nd_region); + if (nd_dax) + dev = nd_pfn_devinit(&nd_dax->nd_pfn, NULL); + __nd_device_register(dev); + return dev; +} diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index e5ad5162bf34..c5e3196c45b0 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1288,6 +1288,8 @@ static ssize_t mode_show(struct device *dev, mode = "safe"; else if (claim && is_nd_pfn(claim)) mode = "memory"; + else if (claim && is_nd_dax(claim)) + mode = "dax"; else if (!claim && pmem_should_map_pages(dev)) mode = "memory"; else @@ -1379,14 +1381,17 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) { struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL; struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL; + struct nd_dax *nd_dax = is_nd_dax(dev) ? to_nd_dax(dev) : NULL; struct nd_namespace_common *ndns = NULL; resource_size_t size; - if (nd_btt || nd_pfn) { + if (nd_btt || nd_pfn || nd_dax) { if (nd_btt) ndns = nd_btt->ndns; else if (nd_pfn) ndns = nd_pfn->ndns; + else if (nd_dax) + ndns = nd_dax->nd_pfn.ndns; if (!ndns) return ERR_PTR(-ENODEV); @@ -1779,6 +1784,18 @@ void nd_region_create_blk_seed(struct nd_region *nd_region) nd_device_register(nd_region->ns_seed); } +void nd_region_create_dax_seed(struct nd_region *nd_region) +{ + WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); + nd_region->dax_seed = nd_dax_create(nd_region); + /* + * Seed creation failures are not fatal, provisioning is simply + * disabled until memory becomes available + */ + if (!nd_region->dax_seed) + dev_err(&nd_region->dev, "failed to create dax namespace\n"); +} + void nd_region_create_pfn_seed(struct nd_region *nd_region) { WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 1d1500f3d8b5..cb65308c0329 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -54,6 +54,7 @@ struct nd_region; void nd_region_create_blk_seed(struct nd_region *nd_region); void nd_region_create_btt_seed(struct nd_region *nd_region); void nd_region_create_pfn_seed(struct nd_region *nd_region); +void nd_region_create_dax_seed(struct nd_region *nd_region); void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev); int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 6c36509662e4..46910b8f32b1 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -101,10 +101,12 @@ struct nd_region { struct ida ns_ida; struct ida btt_ida; struct ida pfn_ida; + struct ida dax_ida; unsigned long flags; struct device *ns_seed; struct device *btt_seed; struct device *pfn_seed; + struct device *dax_seed; u16 ndr_mappings; u64 ndr_size; u64 ndr_start; @@ -161,6 +163,10 @@ struct nd_pfn { struct nd_namespace_common *ndns; }; +struct nd_dax { + struct nd_pfn nd_pfn; +}; + enum nd_async_mode { ND_SYNC, ND_ASYNC, @@ -224,7 +230,10 @@ struct nd_pfn *to_nd_pfn(struct device *dev); int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_pfn(struct device *dev); struct device *nd_pfn_create(struct nd_region *nd_region); +struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, + struct nd_namespace_common *ndns); int nd_pfn_validate(struct nd_pfn *nd_pfn); +extern struct attribute_group nd_pfn_attribute_group; #else static inline int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) @@ -248,6 +257,22 @@ static inline int nd_pfn_validate(struct nd_pfn *nd_pfn) } #endif +struct nd_dax *to_nd_dax(struct device *dev); +#if IS_ENABLED(CONFIG_NVDIMM_DAX) +bool is_nd_dax(struct device *dev); +struct device *nd_dax_create(struct nd_region *nd_region); +#else +static inline bool is_nd_dax(struct device *dev) +{ + return false; +} + +static inline struct device *nd_dax_create(struct nd_region *nd_region) +{ + return NULL; +} +#endif + struct nd_region *to_nd_region(struct device *dev); int nd_region_to_nstype(struct nd_region *nd_region); int nd_region_register_namespaces(struct nd_region *nd_region, int *err); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index e8693fe65e49..6ade2eb7615d 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -54,10 +54,29 @@ struct nd_pfn *to_nd_pfn(struct device *dev) } EXPORT_SYMBOL(to_nd_pfn); +static struct nd_pfn *to_nd_pfn_safe(struct device *dev) +{ + /* + * pfn device attributes are re-used by dax device instances, so we + * need to be careful to correct device-to-nd_pfn conversion. + */ + if (is_nd_pfn(dev)) + return to_nd_pfn(dev); + + if (is_nd_dax(dev)) { + struct nd_dax *nd_dax = to_nd_dax(dev); + + return &nd_dax->nd_pfn; + } + + WARN_ON(1); + return NULL; +} + static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); switch (nd_pfn->mode) { case PFN_MODE_RAM: @@ -72,7 +91,7 @@ static ssize_t mode_show(struct device *dev, static ssize_t mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc = 0; device_lock(dev); @@ -106,7 +125,7 @@ static DEVICE_ATTR_RW(mode); static ssize_t align_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); return sprintf(buf, "%lx\n", nd_pfn->align); } @@ -134,7 +153,7 @@ static ssize_t __align_store(struct nd_pfn *nd_pfn, const char *buf) static ssize_t align_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -152,7 +171,7 @@ static DEVICE_ATTR_RW(align); static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); if (nd_pfn->uuid) return sprintf(buf, "%pUb\n", nd_pfn->uuid); @@ -162,7 +181,7 @@ static ssize_t uuid_show(struct device *dev, static ssize_t uuid_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -178,7 +197,7 @@ static DEVICE_ATTR_RW(uuid); static ssize_t namespace_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; nvdimm_bus_lock(dev); @@ -191,7 +210,7 @@ static ssize_t namespace_show(struct device *dev, static ssize_t namespace_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -209,7 +228,7 @@ static DEVICE_ATTR_RW(namespace); static ssize_t resource_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -235,7 +254,7 @@ static DEVICE_ATTR_RO(resource); static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -270,7 +289,7 @@ static struct attribute *nd_pfn_attributes[] = { NULL, }; -static struct attribute_group nd_pfn_attribute_group = { +struct attribute_group nd_pfn_attribute_group = { .attrs = nd_pfn_attributes, }; @@ -281,15 +300,31 @@ static const struct attribute_group *nd_pfn_attribute_groups[] = { NULL, }; -static struct device *__nd_pfn_create(struct nd_region *nd_region, +struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, struct nd_namespace_common *ndns) { - struct nd_pfn *nd_pfn; - struct device *dev; + struct device *dev = &nd_pfn->dev; - /* we can only create pages for contiguous ranged of pmem */ - if (!is_nd_pmem(&nd_region->dev)) + if (!nd_pfn) + return NULL; + + nd_pfn->mode = PFN_MODE_NONE; + nd_pfn->align = HPAGE_SIZE; + dev = &nd_pfn->dev; + device_initialize(&nd_pfn->dev); + if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { + dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", + __func__, dev_name(ndns->claim)); + put_device(dev); return NULL; + } + return dev; +} + +static struct nd_pfn *nd_pfn_alloc(struct nd_region *nd_region) +{ + struct nd_pfn *nd_pfn; + struct device *dev; nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); if (!nd_pfn) @@ -301,29 +336,27 @@ static struct device *__nd_pfn_create(struct nd_region *nd_region, return NULL; } - nd_pfn->mode = PFN_MODE_NONE; - nd_pfn->align = HPAGE_SIZE; dev = &nd_pfn->dev; dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); - dev->parent = &nd_region->dev; - dev->type = &nd_pfn_device_type; dev->groups = nd_pfn_attribute_groups; - device_initialize(&nd_pfn->dev); - if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { - dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", - __func__, dev_name(ndns->claim)); - put_device(dev); - return NULL; - } - return dev; + dev->type = &nd_pfn_device_type; + dev->parent = &nd_region->dev; + + return nd_pfn; } struct device *nd_pfn_create(struct nd_region *nd_region) { - struct device *dev = __nd_pfn_create(nd_region, NULL); + struct nd_pfn *nd_pfn; + struct device *dev; + + if (!is_nd_pmem(&nd_region->dev)) + return NULL; + + nd_pfn = nd_pfn_alloc(nd_region); + dev = nd_pfn_devinit(nd_pfn, NULL); - if (dev) - __nd_device_register(dev); + __nd_device_register(dev); return dev; } @@ -423,7 +456,8 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) return -ENODEV; nvdimm_bus_lock(&ndns->dev); - pfn_dev = __nd_pfn_create(nd_region, ndns); + nd_pfn = nd_pfn_alloc(nd_region); + pfn_dev = nd_pfn_devinit(nd_pfn, ndns); nvdimm_bus_unlock(&ndns->dev); if (!pfn_dev) return -ENOMEM; diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 4b7715e29cff..05a912359939 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -54,6 +54,7 @@ static int nd_region_probe(struct device *dev) nd_region->btt_seed = nd_btt_create(nd_region); nd_region->pfn_seed = nd_pfn_create(nd_region); + nd_region->dax_seed = nd_dax_create(nd_region); if (err == 0) return 0; @@ -86,6 +87,7 @@ static int nd_region_remove(struct device *dev) nd_region->ns_seed = NULL; nd_region->btt_seed = NULL; nd_region->pfn_seed = NULL; + nd_region->dax_seed = NULL; dev_set_drvdata(dev, NULL); nvdimm_bus_unlock(dev); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 139bf71ca549..9e1b054e0e61 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -306,6 +306,23 @@ static ssize_t pfn_seed_show(struct device *dev, } static DEVICE_ATTR_RO(pfn_seed); +static ssize_t dax_seed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (nd_region->dax_seed) + rc = sprintf(buf, "%s\n", dev_name(nd_region->dax_seed)); + else + rc = sprintf(buf, "\n"); + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(dax_seed); + static ssize_t read_only_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -335,6 +352,7 @@ static struct attribute *nd_region_attributes[] = { &dev_attr_mappings.attr, &dev_attr_btt_seed.attr, &dev_attr_pfn_seed.attr, + &dev_attr_dax_seed.attr, &dev_attr_read_only.attr, &dev_attr_set_cookie.attr, &dev_attr_available_size.attr, @@ -353,6 +371,9 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) if (!is_nd_pmem(dev) && a == &dev_attr_pfn_seed.attr) return 0; + if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr) + return 0; + if (a != &dev_attr_set_cookie.attr && a != &dev_attr_available_size.attr) return a->mode; @@ -441,6 +462,13 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, nd_region_create_pfn_seed(nd_region); nvdimm_bus_unlock(dev); } + if (is_nd_dax(dev) && probe) { + nd_region = to_nd_region(dev->parent); + nvdimm_bus_lock(dev); + if (nd_region->dax_seed == dev) + nd_region_create_dax_seed(nd_region); + nvdimm_bus_unlock(dev); + } } void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev) @@ -718,6 +746,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, ida_init(&nd_region->ns_ida); ida_init(&nd_region->btt_ida); ida_init(&nd_region->pfn_ida); + ida_init(&nd_region->dax_ida); dev = &nd_region->dev; dev_set_name(dev, "region%d", nd_region->id); dev->parent = &nvdimm_bus->dev; diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 7cc28ab05b87..4f29d247f709 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -206,6 +206,7 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_DEVICE_NAMESPACE_IO 4 /* legacy persistent memory */ #define ND_DEVICE_NAMESPACE_PMEM 5 /* PMEM namespace (may alias with BLK) */ #define ND_DEVICE_NAMESPACE_BLK 6 /* BLK namespace (may alias with PMEM) */ +#define ND_DEVICE_DAX_PMEM 7 /* Device DAX interface to pmem */ enum nd_driver_flags { ND_DRIVER_DIMM = 1 << ND_DEVICE_DIMM, @@ -214,6 +215,7 @@ enum nd_driver_flags { ND_DRIVER_NAMESPACE_IO = 1 << ND_DEVICE_NAMESPACE_IO, ND_DRIVER_NAMESPACE_PMEM = 1 << ND_DEVICE_NAMESPACE_PMEM, ND_DRIVER_NAMESPACE_BLK = 1 << ND_DEVICE_NAMESPACE_BLK, + ND_DRIVER_DAX_PMEM = 1 << ND_DEVICE_DAX_PMEM, }; enum { diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index d5bc8c080b44..5ff6d3c126a9 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -50,6 +50,7 @@ libnvdimm-y += $(NVDIMM_SRC)/label.o libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o +libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o libnvdimm-y += config_check.o obj-m += test/ -- cgit From 438517ec78fa4e9db9892e749ed50e2a21f5f17f Mon Sep 17 00:00:00 2001 From: Chris Smart Date: Mon, 2 May 2016 13:51:38 +1000 Subject: selftests/powerpc: Test cp_abort during context switch Test that performing a copy paste sequence in userspace on P9 does not result in a leak of the copy into the paste of another process. This is based on Anton Blanchard's context_switch benchmarking code. It sets up two processes tied to the same CPU, one which copies and one which pastes. The paste should never succeed and the test fails if it does. This is a test for commit, "8a64904 powerpc: Add support for userspace P9 copy paste." Patch created with much assistance from Michael Neuling Signed-off-by: Chris Smart Reviewed-by: Cyril Bur Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/Makefile | 1 + .../selftests/powerpc/context_switch/.gitignore | 1 + .../selftests/powerpc/context_switch/Makefile | 10 ++ .../selftests/powerpc/context_switch/cp_abort.c | 110 +++++++++++++++++++++ tools/testing/selftests/powerpc/utils.h | 7 ++ 5 files changed, 129 insertions(+) create mode 100644 tools/testing/selftests/powerpc/context_switch/.gitignore create mode 100644 tools/testing/selftests/powerpc/context_switch/Makefile create mode 100644 tools/testing/selftests/powerpc/context_switch/cp_abort.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index b08f77cbe31b..4ca83fe80654 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -14,6 +14,7 @@ export CFLAGS SUB_DIRS = benchmarks \ copyloops \ + context_switch \ dscr \ mm \ pmu \ diff --git a/tools/testing/selftests/powerpc/context_switch/.gitignore b/tools/testing/selftests/powerpc/context_switch/.gitignore new file mode 100644 index 000000000000..c1431af7b51c --- /dev/null +++ b/tools/testing/selftests/powerpc/context_switch/.gitignore @@ -0,0 +1 @@ +cp_abort diff --git a/tools/testing/selftests/powerpc/context_switch/Makefile b/tools/testing/selftests/powerpc/context_switch/Makefile new file mode 100644 index 000000000000..e164d1466466 --- /dev/null +++ b/tools/testing/selftests/powerpc/context_switch/Makefile @@ -0,0 +1,10 @@ +TEST_PROGS := cp_abort + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c ../utils.c + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/context_switch/cp_abort.c b/tools/testing/selftests/powerpc/context_switch/cp_abort.c new file mode 100644 index 000000000000..5a5b55afda0e --- /dev/null +++ b/tools/testing/selftests/powerpc/context_switch/cp_abort.c @@ -0,0 +1,110 @@ +/* + * Adapted from Anton Blanchard's context switch microbenchmark. + * + * Copyright 2009, Anton Blanchard, IBM Corporation. + * Copyright 2016, Mikey Neuling, Chris Smart, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This program tests the copy paste abort functionality of a P9 + * (or later) by setting up two processes on the same CPU, one + * which executes the copy instruction and the other which + * executes paste. + * + * The paste instruction should never succeed, as the cp_abort + * instruction is called by the kernel during a context switch. + * + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include "utils.h" +#include + +#define READ_FD 0 +#define WRITE_FD 1 + +#define NUM_LOOPS 1000 + +/* This defines the "paste" instruction from Power ISA 3.0 Book II, section 4.4. */ +#define PASTE(RA, RB, L, RC) \ + .long (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) | (RC) << (31-31)) + +int paste(void *i) +{ + int cr; + + asm volatile(str(PASTE(0, %1, 1, 1))";" + "mfcr %0;" + : "=r" (cr) + : "b" (i) + : "memory" + ); + return cr; +} + +/* This defines the "copy" instruction from Power ISA 3.0 Book II, section 4.4. */ +#define COPY(RA, RB, L) \ + .long (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10)) + +void copy(void *i) +{ + asm volatile(str(COPY(0, %0, 1))";" + : + : "b" (i) + : "memory" + ); +} + +int test_cp_abort(void) +{ + /* 128 bytes for a full cache line */ + char buf[128] __cacheline_aligned; + cpu_set_t cpuset; + int fd1[2], fd2[2], pid; + char c; + + /* only run this test on a P9 or later */ + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00)); + + /* + * Run both processes on the same CPU, so that copy is more likely + * to leak into a paste. + */ + CPU_ZERO(&cpuset); + CPU_SET(pick_online_cpu(), &cpuset); + FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset)); + + FAIL_IF(pipe(fd1) || pipe(fd2)); + + pid = fork(); + FAIL_IF(pid < 0); + + if (!pid) { + for (int i = 0; i < NUM_LOOPS; i++) { + FAIL_IF((write(fd1[WRITE_FD], &c, 1)) != 1); + FAIL_IF((read(fd2[READ_FD], &c, 1)) != 1); + /* A paste succeeds if CR0 EQ bit is set */ + FAIL_IF(paste(buf) & 0x20000000); + } + } else { + for (int i = 0; i < NUM_LOOPS; i++) { + FAIL_IF((read(fd1[READ_FD], &c, 1)) != 1); + copy(buf); + FAIL_IF((write(fd2[WRITE_FD], &c, 1) != 1)); + } + } + return 0; + +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_cp_abort, "cp_abort"); +} diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h index 175ac6ad10dd..3b370deafb62 100644 --- a/tools/testing/selftests/powerpc/utils.h +++ b/tools/testing/selftests/powerpc/utils.h @@ -6,6 +6,8 @@ #ifndef _SELFTESTS_POWERPC_UTILS_H #define _SELFTESTS_POWERPC_UTILS_H +#define __cacheline_aligned __attribute__((aligned(128))) + #include #include #include @@ -54,4 +56,9 @@ do { \ #define _str(s) #s #define str(s) _str(s) +/* POWER9 feature */ +#ifndef PPC_FEATURE2_ARCH_3_00 +#define PPC_FEATURE2_ARCH_3_00 0x00800000 +#endif + #endif /* _SELFTESTS_POWERPC_UTILS_H */ -- cgit From 2f67798c1f9f35ba576ac0639b9b648b9b2033f6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 2 May 2016 14:54:29 +1000 Subject: selftests/powerpc: Fix subpage_prot test to return !0 on failure It's helpful for automated testing if the test returns error codes back to the calling program. While we're here fix all the usages of %p to remove the double 0x, ie. %p already includes 0x. Signed-off-by: Michael Ellerman Reviewed-by: Aneesh Kumar K.V --- tools/testing/selftests/powerpc/mm/subpage_prot.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c index 440180ff8089..35ade7406dcd 100644 --- a/tools/testing/selftests/powerpc/mm/subpage_prot.c +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -73,7 +73,7 @@ static inline void check_faulted(void *addr, long page, long subpage, int write) want_fault |= (subpage == ((page + 1) % 16)); if (faulted != want_fault) { - printf("Failed at 0x%p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n", + printf("Failed at %p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n", addr, page, subpage, write, want_fault ? "fault" : "pass", faulted ? "fault" : "pass"); @@ -82,7 +82,7 @@ static inline void check_faulted(void *addr, long page, long subpage, int write) if (faulted) { if (dar != addr) { - printf("Fault expected at 0x%p and happened at 0x%p !\n", + printf("Fault expected at %p and happened at %p !\n", addr, dar); } faulted = 0; @@ -162,7 +162,7 @@ int test_anon(void) mallocblock = (void *)align; - printf("allocated malloc block of 0x%lx bytes at 0x%p\n", + printf("allocated malloc block of 0x%lx bytes at %p\n", mallocsize, mallocblock); printf("testing malloc block...\n"); @@ -197,7 +197,7 @@ int test_file(void) perror("failed to map file"); return 1; } - printf("allocated %s for 0x%lx bytes at 0x%p\n", + printf("allocated %s for 0x%lx bytes at %p\n", file_name, filesize, fileblock); printf("testing file map...\n"); @@ -207,14 +207,16 @@ int test_file(void) int main(int argc, char *argv[]) { - test_harness(test_anon, "subpage_prot_anon"); + int rc; + + rc = test_harness(test_anon, "subpage_prot_anon"); + if (rc) + return rc; if (argc > 1) file_name = argv[1]; else file_name = "tempfile"; - test_harness(test_file, "subpage_prot_file"); - - return 0; + return test_harness(test_file, "subpage_prot_file"); } -- cgit From c4522469e6418fb547fbcd9d3aeb73bea0f6f859 Mon Sep 17 00:00:00 2001 From: Chandan Kumar Date: Thu, 28 Apr 2016 15:01:09 +0530 Subject: perf/powerpc: Add support for unwinding perf-stackdump Adds support for unwinding user stack dump by linking with libunwind. Signed-off-by: Chandan Kumar Reviewed-by: Naveen N. Rao Tested-by: Naveen N. Rao Acked-by: Arnaldo Carvalho de Melo Signed-off-by: Michael Ellerman --- tools/perf/arch/powerpc/util/Build | 1 + tools/perf/arch/powerpc/util/unwind-libunwind.c | 96 +++++++++++++++++++++++++ tools/perf/config/Makefile | 1 + 3 files changed, 98 insertions(+) create mode 100644 tools/perf/arch/powerpc/util/unwind-libunwind.c (limited to 'tools') diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 9ee335016592..90ad64b231cd 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -5,3 +5,4 @@ libperf-y += perf_regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/powerpc/util/unwind-libunwind.c b/tools/perf/arch/powerpc/util/unwind-libunwind.c new file mode 100644 index 000000000000..9e15f92ae49f --- /dev/null +++ b/tools/perf/arch/powerpc/util/unwind-libunwind.c @@ -0,0 +1,96 @@ +/* + * Copyright 2016 Chandan Kumar, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include "../../util/unwind.h" +#include "../../util/debug.h" + +int libunwind__arch_reg_id(int regnum) +{ + switch (regnum) { + case UNW_PPC64_R0: + return PERF_REG_POWERPC_R0; + case UNW_PPC64_R1: + return PERF_REG_POWERPC_R1; + case UNW_PPC64_R2: + return PERF_REG_POWERPC_R2; + case UNW_PPC64_R3: + return PERF_REG_POWERPC_R3; + case UNW_PPC64_R4: + return PERF_REG_POWERPC_R4; + case UNW_PPC64_R5: + return PERF_REG_POWERPC_R5; + case UNW_PPC64_R6: + return PERF_REG_POWERPC_R6; + case UNW_PPC64_R7: + return PERF_REG_POWERPC_R7; + case UNW_PPC64_R8: + return PERF_REG_POWERPC_R8; + case UNW_PPC64_R9: + return PERF_REG_POWERPC_R9; + case UNW_PPC64_R10: + return PERF_REG_POWERPC_R10; + case UNW_PPC64_R11: + return PERF_REG_POWERPC_R11; + case UNW_PPC64_R12: + return PERF_REG_POWERPC_R12; + case UNW_PPC64_R13: + return PERF_REG_POWERPC_R13; + case UNW_PPC64_R14: + return PERF_REG_POWERPC_R14; + case UNW_PPC64_R15: + return PERF_REG_POWERPC_R15; + case UNW_PPC64_R16: + return PERF_REG_POWERPC_R16; + case UNW_PPC64_R17: + return PERF_REG_POWERPC_R17; + case UNW_PPC64_R18: + return PERF_REG_POWERPC_R18; + case UNW_PPC64_R19: + return PERF_REG_POWERPC_R19; + case UNW_PPC64_R20: + return PERF_REG_POWERPC_R20; + case UNW_PPC64_R21: + return PERF_REG_POWERPC_R21; + case UNW_PPC64_R22: + return PERF_REG_POWERPC_R22; + case UNW_PPC64_R23: + return PERF_REG_POWERPC_R23; + case UNW_PPC64_R24: + return PERF_REG_POWERPC_R24; + case UNW_PPC64_R25: + return PERF_REG_POWERPC_R25; + case UNW_PPC64_R26: + return PERF_REG_POWERPC_R26; + case UNW_PPC64_R27: + return PERF_REG_POWERPC_R27; + case UNW_PPC64_R28: + return PERF_REG_POWERPC_R28; + case UNW_PPC64_R29: + return PERF_REG_POWERPC_R29; + case UNW_PPC64_R30: + return PERF_REG_POWERPC_R30; + case UNW_PPC64_R31: + return PERF_REG_POWERPC_R31; + case UNW_PPC64_LR: + return PERF_REG_POWERPC_LINK; + case UNW_PPC64_CTR: + return PERF_REG_POWERPC_CTR; + case UNW_PPC64_XER: + return PERF_REG_POWERPC_XER; + case UNW_PPC64_NIP: + return PERF_REG_POWERPC_NIP; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + return -EINVAL; +} diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index eda1cfbbfb2d..220536cf2b59 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -26,6 +26,7 @@ NO_PERF_REGS := 1 # Additional ARCH settings for ppc ifeq ($(ARCH),powerpc) NO_PERF_REGS := 0 + LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 endif # Additional ARCH settings for x86 -- cgit From f47822078dece7189cad0a5f472f148e5e916736 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Thu, 28 Apr 2016 15:01:10 +0530 Subject: perf tools: Fix perf regs mask generation On some architectures (powerpc in particular), the number of registers exceeds what can be represented in an integer bitmask. Ensure we generate the proper bitmask on such platforms. Fixes: 71ad0f5e4 ("perf tools: Support for DWARF CFI unwinding on post processing") Signed-off-by: Naveen N. Rao Acked-by: Arnaldo Carvalho de Melo Signed-off-by: Michael Ellerman --- tools/perf/util/perf_regs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 6b8eb13e14e4..c4023f22f287 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -12,18 +12,18 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) int i, idx = 0; u64 mask = regs->mask; - if (regs->cache_mask & (1 << id)) + if (regs->cache_mask & (1ULL << id)) goto out; - if (!(mask & (1 << id))) + if (!(mask & (1ULL << id))) return -EINVAL; for (i = 0; i < id; i++) { - if (mask & (1 << i)) + if (mask & (1ULL << i)) idx++; } - regs->cache_mask |= (1 << id); + regs->cache_mask |= (1ULL << id); regs->cache_regs[id] = regs->regs[idx]; out: -- cgit From 2d59b3b25659463a24f05df367574d90b3cd7145 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Wed, 23 Dec 2015 16:49:50 +1100 Subject: selftests/powerpc: Make reg.h common to all powerpc selftests Currently there is a reg.h in pmu/ebb that has defines that are useful in other powerpc selftests so move this up into selftests/powerpc folder. Also include in utils.h - as this is often used in self tests. Add in some other useful register defines. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/pmu/ebb/ebb.c | 1 - tools/testing/selftests/powerpc/pmu/ebb/reg.h | 49 ------------------- .../selftests/powerpc/pmu/ebb/reg_access_test.c | 1 - tools/testing/selftests/powerpc/reg.h | 55 ++++++++++++++++++++++ tools/testing/selftests/powerpc/utils.h | 1 + 5 files changed, 56 insertions(+), 51 deletions(-) delete mode 100644 tools/testing/selftests/powerpc/pmu/ebb/reg.h create mode 100644 tools/testing/selftests/powerpc/reg.h (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c index e67452f1bcff..46681fec549b 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c @@ -15,7 +15,6 @@ #include #include "trace.h" -#include "reg.h" #include "ebb.h" diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg.h b/tools/testing/selftests/powerpc/pmu/ebb/reg.h deleted file mode 100644 index 5921b0dfe2e9..000000000000 --- a/tools/testing/selftests/powerpc/pmu/ebb/reg.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2014, Michael Ellerman, IBM Corp. - * Licensed under GPLv2. - */ - -#ifndef _SELFTESTS_POWERPC_REG_H -#define _SELFTESTS_POWERPC_REG_H - -#define __stringify_1(x) #x -#define __stringify(x) __stringify_1(x) - -#define mfspr(rn) ({unsigned long rval; \ - asm volatile("mfspr %0," __stringify(rn) \ - : "=r" (rval)); rval; }) -#define mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \ - : "r" ((unsigned long)(v)) \ - : "memory") - -#define mb() asm volatile("sync" : : : "memory"); - -#define SPRN_MMCR2 769 -#define SPRN_MMCRA 770 -#define SPRN_MMCR0 779 -#define MMCR0_PMAO 0x00000080 -#define MMCR0_PMAE 0x04000000 -#define MMCR0_FC 0x80000000 -#define SPRN_EBBHR 804 -#define SPRN_EBBRR 805 -#define SPRN_BESCR 806 /* Branch event status & control register */ -#define SPRN_BESCRS 800 /* Branch event status & control set (1 bits set to 1) */ -#define SPRN_BESCRSU 801 /* Branch event status & control set upper */ -#define SPRN_BESCRR 802 /* Branch event status & control REset (1 bits set to 0) */ -#define SPRN_BESCRRU 803 /* Branch event status & control REset upper */ - -#define BESCR_PMEO 0x1 /* PMU Event-based exception Occurred */ -#define BESCR_PME (0x1ul << 32) /* PMU Event-based exception Enable */ - -#define SPRN_PMC1 771 -#define SPRN_PMC2 772 -#define SPRN_PMC3 773 -#define SPRN_PMC4 774 -#define SPRN_PMC5 775 -#define SPRN_PMC6 776 - -#define SPRN_SIAR 780 -#define SPRN_SDAR 781 -#define SPRN_SIER 768 - -#endif /* _SELFTESTS_POWERPC_REG_H */ diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c index 5b1188f10c15..f923228bca22 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c @@ -7,7 +7,6 @@ #include #include "ebb.h" -#include "reg.h" /* diff --git a/tools/testing/selftests/powerpc/reg.h b/tools/testing/selftests/powerpc/reg.h new file mode 100644 index 000000000000..65bfdeeebdee --- /dev/null +++ b/tools/testing/selftests/powerpc/reg.h @@ -0,0 +1,55 @@ +/* + * Copyright 2014, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#ifndef _SELFTESTS_POWERPC_REG_H +#define _SELFTESTS_POWERPC_REG_H + +#define __stringify_1(x) #x +#define __stringify(x) __stringify_1(x) + +#define mfspr(rn) ({unsigned long rval; \ + asm volatile("mfspr %0," _str(rn) \ + : "=r" (rval)); rval; }) +#define mtspr(rn, v) asm volatile("mtspr " _str(rn) ",%0" : \ + : "r" ((unsigned long)(v)) \ + : "memory") + +#define mb() asm volatile("sync" : : : "memory"); + +#define SPRN_MMCR2 769 +#define SPRN_MMCRA 770 +#define SPRN_MMCR0 779 +#define MMCR0_PMAO 0x00000080 +#define MMCR0_PMAE 0x04000000 +#define MMCR0_FC 0x80000000 +#define SPRN_EBBHR 804 +#define SPRN_EBBRR 805 +#define SPRN_BESCR 806 /* Branch event status & control register */ +#define SPRN_BESCRS 800 /* Branch event status & control set (1 bits set to 1) */ +#define SPRN_BESCRSU 801 /* Branch event status & control set upper */ +#define SPRN_BESCRR 802 /* Branch event status & control REset (1 bits set to 0) */ +#define SPRN_BESCRRU 803 /* Branch event status & control REset upper */ + +#define BESCR_PMEO 0x1 /* PMU Event-based exception Occurred */ +#define BESCR_PME (0x1ul << 32) /* PMU Event-based exception Enable */ + +#define SPRN_PMC1 771 +#define SPRN_PMC2 772 +#define SPRN_PMC3 773 +#define SPRN_PMC4 774 +#define SPRN_PMC5 775 +#define SPRN_PMC6 776 + +#define SPRN_SIAR 780 +#define SPRN_SDAR 781 +#define SPRN_SIER 768 + +#define SPRN_TEXASR 0x82 +#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ +#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ +#define TEXASR_FS 0x08000000 +#define SPRN_TAR 0x32f + +#endif /* _SELFTESTS_POWERPC_REG_H */ diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h index 3b370deafb62..a985cfaa535e 100644 --- a/tools/testing/selftests/powerpc/utils.h +++ b/tools/testing/selftests/powerpc/utils.h @@ -11,6 +11,7 @@ #include #include #include +#include "reg.h" /* Avoid headaches with PRI?64 - just use %ll? always */ typedef unsigned long long u64; -- cgit From da3ddc3b5fea695f7b2fa89c4ca17dfd529293d2 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Wed, 23 Dec 2015 16:49:51 +1100 Subject: selftests/powerpc: Standardise TM calls Currently tbegin, tend etc are written as opcodes or asm instructions. So standardise these to asm instructions. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/tm-resched-dscr.c | 16 +++++----------- tools/testing/selftests/powerpc/tm/tm-signal-stack.c | 4 ++-- 2 files changed, 7 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c index 8fde93d6021f..d9c49f41515e 100644 --- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c +++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c @@ -31,12 +31,6 @@ #include "utils.h" #include "tm.h" -#define TBEGIN ".long 0x7C00051D ;" -#define TEND ".long 0x7C00055D ;" -#define TCHECK ".long 0x7C00059C ;" -#define TSUSPEND ".long 0x7C0005DD ;" -#define TRESUME ".long 0x7C2005DD ;" -#define SPRN_TEXASR 0x82 #define SPRN_DSCR 0x03 int test_body(void) @@ -55,13 +49,13 @@ int test_body(void) "mtspr %[sprn_dscr], 3;" /* start and suspend a transaction */ - TBEGIN + "tbegin.;" "beq 1f;" - TSUSPEND + "tsuspend.;" /* hard loop until the transaction becomes doomed */ "2: ;" - TCHECK + "tcheck 0;" "bc 4, 0, 2b;" /* record DSCR and TEXASR */ @@ -70,8 +64,8 @@ int test_body(void) "mfspr 3, %[sprn_texasr];" "std 3, %[texasr];" - TRESUME - TEND + "tresume.;" + "tend.;" "li %[rv], 0;" "1: ;" : [rv]"=r"(rv), [dscr2]"=m"(dscr2), [texasr]"=m"(texasr) diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c index e44a238c1d77..1f0eb567438d 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c @@ -60,9 +60,9 @@ int tm_signal_stack() exit(1); asm volatile("li 1, 0 ;" /* stack ptr == NULL */ "1:" - ".long 0x7C00051D ;" /* tbegin */ + "tbegin.;" "beq 1b ;" /* retry forever */ - ".long 0x7C0005DD ; ;" /* tsuspend */ + "tsuspend.;" "ld 2, 0(1) ;" /* trigger segv" */ : : : "memory"); -- cgit From d95be4ca3e457044be55c8b1c1b5ac64f17d8a92 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Wed, 23 Dec 2015 16:49:52 +1100 Subject: selftests/powerpc: Add test for forking inside transaction This test does a fork syscall inside a transaction. Basic sniff test to see if we can enter the kernel during a transaction. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/.gitignore | 1 + tools/testing/selftests/powerpc/tm/Makefile | 2 +- tools/testing/selftests/powerpc/tm/tm-fork.c | 42 +++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/tm/tm-fork.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 7d0f14b8cb2e..d216eddcd493 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -3,3 +3,4 @@ tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy +tm-fork diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 737f72c964e6..2db475b12720 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,4 +1,4 @@ -TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy +TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork all: $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/tm/tm-fork.c b/tools/testing/selftests/powerpc/tm/tm-fork.c new file mode 100644 index 000000000000..8d48579b7778 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-fork.c @@ -0,0 +1,42 @@ +/* + * Copyright 2015, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * + * Edited: Rashmica Gupta, Nov 2015 + * + * This test does a fork syscall inside a transaction. Basic sniff test + * to see if we can enter the kernel during a transaction. + */ + +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tm.h" + +int test_fork(void) +{ + SKIP_IF(!have_htm()); + + asm __volatile__( + "tbegin.;" + "blt 1f; " + "li 0, 2;" /* fork syscall */ + "sc ;" + "tend.;" + "1: ;" + : : : "memory", "r0"); + /* If we reach here, we've passed. Otherwise we've probably crashed + * the kernel */ + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test_fork, "tm_fork"); +} -- cgit From dbccb4940c9c9c8c93fcf733445e96dd2d6890d6 Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Wed, 23 Dec 2015 16:49:53 +1100 Subject: selftests/powerpc: Add TM test to check if TAR is corrupted If the transaction is aborted, the TAR should be rolled back to the checkpointed value before the transaction began. The value written to the TAR when the transaction is suspended should only remain there if the transaction completes successfully. Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/.gitignore | 1 + tools/testing/selftests/powerpc/tm/Makefile | 2 +- tools/testing/selftests/powerpc/tm/tm-tar.c | 90 +++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/tm/tm-tar.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index d216eddcd493..2ac376410c70 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -4,3 +4,4 @@ tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork +tm-tar diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 2db475b12720..cb4b3bf57a4d 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,4 +1,4 @@ -TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork +TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork tm-tar all: $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/tm/tm-tar.c b/tools/testing/selftests/powerpc/tm/tm-tar.c new file mode 100644 index 000000000000..2d2fcc2b7a60 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-tar.c @@ -0,0 +1,90 @@ +/* + * Copyright 2015, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * Original: Michael Neuling 19/7/2013 + * Edited: Rashmica Gupta 01/12/2015 + * + * Do some transactions, see if the tar is corrupted. + * If the transaction is aborted, the TAR should be rolled back to the + * checkpointed value before the transaction began. The value written to + * TAR in suspended mode should only remain in TAR if the transaction + * completes. + */ + +#include +#include +#include +#include + +#include "tm.h" +#include "utils.h" + +int num_loops = 10000; + +int test_tar(void) +{ + int i; + + SKIP_IF(!have_htm()); + + for (i = 0; i < num_loops; i++) + { + uint64_t result = 0; + asm __volatile__( + "li 7, 1;" + "mtspr %[tar], 7;" /* tar = 1 */ + "tbegin.;" + "beq 3f;" + "li 4, 0x7000;" /* Loop lots, to use time */ + "2:;" /* Start loop */ + "li 7, 2;" + "mtspr %[tar], 7;" /* tar = 2 */ + "tsuspend.;" + "li 7, 3;" + "mtspr %[tar], 7;" /* tar = 3 */ + "tresume.;" + "subi 4, 4, 1;" + "cmpdi 4, 0;" + "bne 2b;" + "tend.;" + + /* Transaction sucess! TAR should be 3 */ + "mfspr 7, %[tar];" + "ori %[res], 7, 4;" // res = 3|4 = 7 + "b 4f;" + + /* Abort handler. TAR should be rolled back to 1 */ + "3:;" + "mfspr 7, %[tar];" + "ori %[res], 7, 8;" // res = 1|8 = 9 + "4:;" + + : [res]"=r"(result) + : [tar]"i"(SPRN_TAR) + : "memory", "r0", "r4", "r7"); + + /* If result is anything else other than 7 or 9, the tar + * value must have been corrupted. */ + if ((result != 7) && (result != 9)) + return 1; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + /* A low number of iterations (eg 100) can cause a false pass */ + if (argc > 1) { + if (strcmp(argv[1], "-h") == 0) { + printf("Syntax:\n\t%s []\n", + argv[0]); + return 1; + } else { + num_loops = atoi(argv[1]); + } + } + + printf("Starting, %d loops\n", num_loops); + + return test_harness(test_tar, "tm_tar"); +} -- cgit From 16aab321872400a4ce35b90ba40484fcb5d636ba Mon Sep 17 00:00:00 2001 From: Rashmica Gupta Date: Wed, 23 Dec 2015 16:49:54 +1100 Subject: selftests/powerpc: Add test to check if TM SPRs are corrupted Testing that the TM SPRs are behaving the way they should. Uses more threads than cpus to see if the following register values persist with context switching: - the FS (failure summary) flag in TEXASR - TFIAR and TFHAR Signed-off-by: Rashmica Gupta Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/tm/.gitignore | 1 + tools/testing/selftests/powerpc/tm/Makefile | 3 +- tools/testing/selftests/powerpc/tm/tm-tmspr.c | 143 ++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/tm/tm-tmspr.c (limited to 'tools') diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 2ac376410c70..bb942db845bf 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -5,3 +5,4 @@ tm-signal-stack tm-vmxcopy tm-fork tm-tar +tm-tmspr diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index cb4b3bf57a4d..d0505dbd22d5 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,4 +1,4 @@ -TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork tm-tar +TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy tm-fork tm-tar tm-tmspr all: $(TEST_PROGS) @@ -6,6 +6,7 @@ $(TEST_PROGS): ../harness.c ../utils.c tm-syscall: tm-syscall-asm.S tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include +tm-tmspr: CFLAGS += -pthread include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c new file mode 100644 index 000000000000..2bda81c7bf23 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c @@ -0,0 +1,143 @@ +/* + * Copyright 2015, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * + * Original: Michael Neuling 3/4/2014 + * Modified: Rashmica Gupta 8/12/2015 + * + * Check if any of the Transaction Memory SPRs get corrupted. + * - TFIAR - stores address of location of transaction failure + * - TFHAR - stores address of software failure handler (if transaction + * fails) + * - TEXASR - lots of info about the transacion(s) + * + * (1) create more threads than cpus + * (2) in each thread: + * (a) set TFIAR and TFHAR a unique value + * (b) loop for awhile, continually checking to see if + * either register has been corrupted. + * + * (3) Loop: + * (a) begin transaction + * (b) abort transaction + * (c) check TEXASR to see if FS has been corrupted + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tm.h" + +int num_loops = 10000; +int passed = 1; + +void tfiar_tfhar(void *in) +{ + int i, cpu; + unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + cpu = (unsigned long)in >> 1; + CPU_SET(cpu, &cpuset); + sched_setaffinity(0, sizeof(cpuset), &cpuset); + + /* TFIAR: Last bit has to be high so userspace can read register */ + tfiar = ((unsigned long)in) + 1; + tfiar += 2; + mtspr(SPRN_TFIAR, tfiar); + + /* TFHAR: Last two bits are reserved */ + tfhar = ((unsigned long)in); + tfhar &= ~0x3UL; + tfhar += 4; + mtspr(SPRN_TFHAR, tfhar); + + for (i = 0; i < num_loops; i++) { + tfhar_rd = mfspr(SPRN_TFHAR); + tfiar_rd = mfspr(SPRN_TFIAR); + if ( (tfhar != tfhar_rd) || (tfiar != tfiar_rd) ) { + passed = 0; + return; + } + } + return; +} + +void texasr(void *in) +{ + unsigned long i; + uint64_t result = 0; + + for (i = 0; i < num_loops; i++) { + asm __volatile__( + "tbegin.;" + "beq 3f ;" + "tabort. 0 ;" + "tend.;" + + /* Abort handler */ + "3: ;" + ::: "memory"); + + /* Check the TEXASR */ + result = mfspr(SPRN_TEXASR); + if ((result & TEXASR_FS) == 0) { + passed = 0; + return; + } + } + return; +} + +int test_tmspr() +{ + pthread_t thread; + int thread_num; + unsigned long i; + + SKIP_IF(!have_htm()); + + /* To cause some context switching */ + thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN); + + /* Test TFIAR and TFHAR */ + for (i = 0 ; i < thread_num ; i += 2){ + if (pthread_create(&thread, NULL, (void*)tfiar_tfhar, (void *)i)) + return EXIT_FAILURE; + } + if (pthread_join(thread, NULL) != 0) + return EXIT_FAILURE; + + /* Test TEXASR */ + for (i = 0 ; i < thread_num ; i++){ + if (pthread_create(&thread, NULL, (void*)texasr, (void *)i)) + return EXIT_FAILURE; + } + if (pthread_join(thread, NULL) != 0) + return EXIT_FAILURE; + + if (passed) + return 0; + else + return 1; +} + +int main(int argc, char *argv[]) +{ + if (argc > 1) { + if (strcmp(argv[1], "-h") == 0) { + printf("Syntax:\t []\n"); + return 0; + } else { + num_loops = atoi(argv[1]); + } + } + return test_harness(test_tmspr, "tm_tmspr"); +} -- cgit From 0ce105bf9723e9a2dc7ec0a1e164c1b63aa64546 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 29 Mar 2016 09:35:29 +0100 Subject: selftests/seccomp: add MIPS self-test support This adds self-test support on MIPS, based on RFC patch from Kees Cook. Modifications from the RFC: - support the O32 syscall which passes the real syscall number in a0. - Use PTRACE_{GET,SET}REGS - Because SYSCALL_NUM and SYSCALL_RET are the same register, it is not possible to test modifying the syscall return value when skipping, since both would need to set the same register. Therefore modify that test case to just detect the skipped test. Tested on MIPS32r2 / MIPS64r2 with O32, N32 and N64 userlands. Signed-off-by: Matt Redfearn Acked-by: Kees Cook Cc: Andy Lutomirski Cc: Shuah Khan Cc: Will Drewry Cc: IMG-MIPSLinuxKerneldevelopers@imgtec.com Cc: linux-kernel@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/12977/ Signed-off-by: Ralf Baechle --- tools/testing/selftests/seccomp/seccomp_bpf.c | 30 +++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 150829dd7998..7947e568e057 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -5,6 +5,7 @@ * Test code for seccomp bpf. */ +#include #include #define __have_siginfo_t 1 #define __have_sigval_t 1 @@ -14,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -1242,6 +1242,12 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS s390_regs # define SYSCALL_NUM gprs[2] # define SYSCALL_RET gprs[2] +#elif defined(__mips__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM regs[2] +# define SYSCALL_SYSCALL_NUM regs[4] +# define SYSCALL_RET regs[2] +# define SYSCALL_NUM_RET_SHARE_REG #else # error "Do not know how to find your architecture's registers and syscalls" #endif @@ -1249,7 +1255,7 @@ TEST_F(TRACE_poke, getpid_runs_normally) /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). */ -#if defined(__x86_64__) || defined(__i386__) +#if defined(__x86_64__) || defined(__i386__) || defined(__mips__) #define HAVE_GETREGS #endif @@ -1273,6 +1279,10 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) } #endif +#if defined(__mips__) + if (regs.SYSCALL_NUM == __NR_O32_Linux) + return regs.SYSCALL_SYSCALL_NUM; +#endif return regs.SYSCALL_NUM; } @@ -1297,6 +1307,13 @@ void change_syscall(struct __test_metadata *_metadata, { regs.SYSCALL_NUM = syscall; } +#elif defined(__mips__) + { + if (regs.SYSCALL_NUM == __NR_O32_Linux) + regs.SYSCALL_SYSCALL_NUM = syscall; + else + regs.SYSCALL_NUM = syscall; + } #elif defined(__arm__) # ifndef PTRACE_SET_SYSCALL @@ -1327,7 +1344,11 @@ void change_syscall(struct __test_metadata *_metadata, /* If syscall is skipped, change return value. */ if (syscall == -1) +#ifdef SYSCALL_NUM_RET_SHARE_REG + TH_LOG("Can't modify syscall return on this architecture"); +#else regs.SYSCALL_RET = 1; +#endif #ifdef HAVE_GETREGS ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); @@ -1465,8 +1486,13 @@ TEST_F(TRACE_syscall, syscall_dropped) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); ASSERT_EQ(0, ret); +#ifdef SYSCALL_NUM_RET_SHARE_REG + /* gettid has been skipped */ + EXPECT_EQ(-1, syscall(__NR_gettid)); +#else /* gettid has been skipped and an altered return value stored. */ EXPECT_EQ(1, syscall(__NR_gettid)); +#endif EXPECT_NE(self->mytid, syscall(__NR_gettid)); } -- cgit From 5a614ec8a7cfe9098475fa1221b409fb7eec6054 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 10 May 2016 23:53:05 +0900 Subject: kselftests/ftrace: Detect tracefs mount point Currently ftracetest assumes tracing directory is located under $DEBUGFS/tracing. But it's possible to mount tracefs directly without debugfs. Signed-off-by: Namhyung Kim Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/ftracetest | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index da48812ab95e..4c6a0bf8ba79 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -88,7 +88,12 @@ parse_opts() { # opts # Parameters DEBUGFS_DIR=`grep debugfs /proc/mounts | cut -f2 -d' ' | head -1` -TRACING_DIR=$DEBUGFS_DIR/tracing +if [ -z "$DEBUGFS_DIR" ]; then + TRACING_DIR=`grep tracefs /proc/mounts | cut -f2 -d' ' | head -1` +else + TRACING_DIR=$DEBUGFS_DIR/tracing +fi + TOP_DIR=`absdir $0` TEST_DIR=$TOP_DIR/test.d TEST_CASES=`find_testcases $TEST_DIR` @@ -102,7 +107,7 @@ parse_opts $* [ $DEBUG -ne 0 ] && set -x # Verify parameters -if [ -z "$DEBUGFS_DIR" -o ! -d "$TRACING_DIR" ]; then +if [ -z "$TRACING_DIR" -o ! -d "$TRACING_DIR" ]; then errexit "No ftrace directory found" fi -- cgit From 2c6c3946c3955f96ae0d48fdac940903918207d8 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 10 May 2016 23:53:06 +0900 Subject: kselftests/ftrace: Add a test case for event pid filtering Check event is filtered by set_event_pid and options/event-fork. Signed-off-by: Namhyung Kim Signed-off-by: Shuah Khan --- .../selftests/ftrace/test.d/event/event-pid.tc | 72 ++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/event/event-pid.tc (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc new file mode 100644 index 000000000000..d4ab27b522f8 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc @@ -0,0 +1,72 @@ +#!/bin/sh +# description: event tracing - restricts events based on pid + +do_reset() { + echo > set_event + echo > set_event_pid + echo 0 > options/event-fork + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 +} + +if [ ! -f set_event -o ! -d events/sched ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f set_event_pid ]; then + echo "event pid filtering is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo 1 > events/sched/sched_switch/enable + +yield + +count=`cat trace | grep sched_switch | wc -l` +if [ $count -eq 0 ]; then + fail "sched_switch events are not recorded" +fi + +do_reset + +read mypid rest < /proc/self/stat + +echo $mypid > set_event_pid +echo 'sched:sched_switch' > set_event + +yield + +count=`cat trace | grep sched_switch | grep -v "pid=$mypid" | wc -l` +if [ $count -ne 0 ]; then + fail "sched_switch events from other task are recorded" +fi + +do_reset + +echo $mypid > set_event_pid +echo 1 > options/event-fork +echo 1 > events/sched/sched_switch/enable + +yield + +count=`cat trace | grep sched_switch | grep -v "pid=$mypid" | wc -l` +if [ $count -eq 0 ]; then + fail "sched_switch events from other task are not recorded" +fi + +do_reset + +exit 0 -- cgit From 6eab37daf0ec1077fd612ff27ab513db20f33767 Mon Sep 17 00:00:00 2001 From: Muhammad Falak R Wani Date: Fri, 13 May 2016 17:35:43 +0530 Subject: tools: testing: define the _GNU_SOURCE macro Add the macro _GNU_SOURCE, to fix CPU_ZERO and CPU_SET undefined compile errors. Signed-off-by: Muhammad Falak R Wani Signed-off-by: Shuah Khan --- tools/testing/selftests/intel_pstate/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh index bdaf37e92684..7868c106b8b1 100755 --- a/tools/testing/selftests/intel_pstate/run.sh +++ b/tools/testing/selftests/intel_pstate/run.sh @@ -32,7 +32,7 @@ EVALUATE_ONLY=0 max_cpus=$(($(nproc)-1)) # compile programs -gcc -o aperf aperf.c -lm +gcc aperf.c -Wall -D_GNU_SOURCE -o aperf -lm [ $? -ne 0 ] && echo "Problem compiling aperf.c." && exit 1 gcc -o msr msr.c -lm [ $? -ne 0 ] && echo "Problem compiling msr.c." && exit 1 -- cgit From e3b03b6c1a4f3b4564be08809f58584592621a0a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 5 May 2016 16:04:03 -0700 Subject: perf stat: Avoid fractional digits for integer scales When the scaling factor is a full integer don't display fractional digits. This avoids unnecessary .00 output for topdown metrics with scale factors. v2: Remove redundant check. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1462489447-31832-7-git-send-email-andi@firstfloor.org [ Rename 'round' to 'stat_round' as 'round' is defined in math.h, included by this patch, and this breaks the build on ubuntu 12.04 ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5645a8361de6..16a923c1633b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -66,6 +66,7 @@ #include #include #include +#include #define DEFAULT_SEPARATOR " " #define CNTR_NOT_SUPPORTED "" @@ -986,12 +987,12 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) const char *fmt; if (csv_output) { - fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; + fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; } else { if (big_num) - fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s"; + fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; else - fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s"; + fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; } aggr_printout(evsel, id, nr); @@ -1995,7 +1996,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_session *session) { - struct stat_round_event *round = &event->stat_round; + struct stat_round_event *stat_round = &event->stat_round; struct perf_evsel *counter; struct timespec tsh, *ts = NULL; const char **argv = session->header.env.cmdline_argv; @@ -2004,12 +2005,12 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, evlist__for_each(evsel_list, counter) perf_stat_process_counter(&stat_config, counter); - if (round->type == PERF_STAT_ROUND_TYPE__FINAL) - update_stats(&walltime_nsecs_stats, round->time); + if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) + update_stats(&walltime_nsecs_stats, stat_round->time); - if (stat_config.interval && round->time) { - tsh.tv_sec = round->time / NSECS_PER_SEC; - tsh.tv_nsec = round->time % NSECS_PER_SEC; + if (stat_config.interval && stat_round->time) { + tsh.tv_sec = stat_round->time / NSECS_PER_SEC; + tsh.tv_nsec = stat_round->time % NSECS_PER_SEC; ts = &tsh; } -- cgit From 6ae98ba611ed1c11ddc5645475bc03b46a3c04e7 Mon Sep 17 00:00:00 2001 From: He Kuang Date: Thu, 12 May 2016 08:43:11 +0000 Subject: perf symbols: Store vdso buildid unconditionally When unwinding callchains on a different machine, vdso info should be available so the unwind process won't be interrupted if address falls into vdso region. But in most cases, the addresses of sample events are not in vdso range, the buildid of a zero hit vdso won't be stored into perf.data. This patch stores vdso buildid regardless of whether the vdso is hit or not. Signed-off-by: He Kuang Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Ekaterina Tumanova Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Kan Liang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Sukadev Bhattiprolu Cc: Wang Nan Link: http://lkml.kernel.org/r/1463042596-61703-3-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 2 +- tools/perf/util/dso.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index bff425e1232c..67e5966503b2 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -256,7 +256,7 @@ static int machine__write_buildid_table(struct machine *machine, int fd) size_t name_len; bool in_kernel = false; - if (!pos->hit) + if (!pos->hit && !dso__is_vdso(pos)) continue; if (dso__is_vdso(pos)) { diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 3357479082ca..75b75615e2f8 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -7,6 +7,7 @@ #include "auxtrace.h" #include "util.h" #include "debug.h" +#include "vdso.h" char dso__symtab_origin(const struct dso *dso) { @@ -1169,7 +1170,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) struct dso *pos; list_for_each_entry(pos, head, node) { - if (with_hits && !pos->hit) + if (with_hits && !pos->hit && !dso__is_vdso(pos)) continue; if (pos->has_build_id) { have_build_id = true; -- cgit From b0404be8d6186f9f3c23e2b5ff247e667be90652 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 13 May 2016 15:01:01 +0900 Subject: perf stat: Fix indentation of stalled backend cycle The commit 140aeadc1fb5 ("perf stat: Abstract stat metrics printing") changed how shadow metrics are printed, but it missed to update the width of the stalled backend cycles event to 7.2% like others. This resulted in misaligned output like below: Performance counter stats for 'pwd': 0.638313 task-clock (msec) # 0.567 CPUs utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 54 page-faults # 0.085 M/sec 885,600 cycles # 1.387 GHz 558,438 stalled-cycles-frontend # 63.06% frontend cycles idle 431,355 stalled-cycles-backend # 48.71% backend cycles idle 674,956 instructions # 0.76 insn per cycle # 0.83 stalled cycles per insn 130,380 branches # 204.257 M/sec branch-misses 0.001125426 seconds time elapsed Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: Jiri Olsa Cc: Peter Zijlstra Fixes: 140aeadc1fb5 ("perf stat: Abstract stat metrics printing") Link: http://lkml.kernel.org/r/1463119263-5569-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index fdb71961143e..61200fcac5ef 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -188,7 +188,7 @@ static void print_stalled_cycles_backend(int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio); + out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); } static void print_branch_misses(int cpu, -- cgit From daf4f4786e8af371048e72cb37ac05190e89198a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 13 May 2016 15:01:02 +0900 Subject: perf stat: Update runtime using cpu-clock event Currently only the task-clock event updates the runtime_nsec so it cannot show the metric when using cpu-clock events. However cpu clock works basically same as task-clock, so no need to not update the runtime IMHO. Before: # perf stat -a -e cpu-clock,context-switches,page-faults,cycles sleep 0.1 Performance counter stats for 'system wide': 1217.759506 cpu-clock (msec) 93 context-switches 61 page-faults 18,958,022 cycles 0.101393794 seconds time elapsed After: Performance counter stats for 'system wide': 1220.471884 cpu-clock (msec) # 12.013 CPUs utilized 118 context-switches # 0.097 K/sec 59 page-faults # 0.048 K/sec 17,941,247 cycles # 0.015 GHz 0.101594777 seconds time elapsed Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1463119263-5569-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 61200fcac5ef..aa9efe08762b 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -94,7 +94,8 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, { int ctx = evsel_context(counter); - if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) + if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || + perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) update_stats(&runtime_nsecs_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); @@ -444,7 +445,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ratio = total / avg; print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); - } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) { + } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) || + perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) { if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", avg / ratio); -- cgit From a1f3d56761df31f0ffeb215b974e26d5613e92a4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 13 May 2016 15:01:03 +0900 Subject: perf stat: Use cpu-clock event for cpu targets Currently 'perf stat' always counts task-clock event by default. But it's somewhat confusing for system-wide targets (especially with 'sleep N' as the 'sleep' task just sleeps and doesn't use cputime). Changing to cpu-clock event instead for that case makes more sense IMHO. Before: # perf stat -a sleep 0.1 Performance counter stats for 'system wide': 403.038603 task-clock (msec) # 4.001 CPUs utilized 150 context-switches # 0.372 K/sec 7 cpu-migrations # 0.017 K/sec 71 page-faults # 0.176 K/sec 23,705,169 cycles # 0.059 GHz 15,888,166 instructions # 0.67 insn per cycle 3,326,078 branches # 8.253 M/sec 87,643 branch-misses # 2.64% of all branches 0.100737009 seconds time elapsed # After: # perf stat -a sleep 0.1 Performance counter stats for 'system wide': 404.271182 cpu-clock (msec) # 4.000 CPUs utilized 143 context-switches # 0.354 K/sec 13 cpu-migrations # 0.032 K/sec 73 page-faults # 0.181 K/sec 22,119,220 cycles # 0.055 GHz 13,622,065 instructions # 0.62 insn per cycle 2,918,769 branches # 7.220 M/sec 85,033 branch-misses # 2.91% of all branches 0.101073089 seconds time elapsed # Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1463119263-5569-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 16a923c1633b..efdd23221c16 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1905,6 +1905,9 @@ static int add_default_attributes(void) } if (!evsel_list->nr_entries) { + if (target__has_cpu(&target)) + default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; + if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) return -1; if (pmu_have_event("cpu", "stalled-cycles-frontend")) { -- cgit From 0a77582f0407e7f9b5d775bebc31297a1b890be0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 15 May 2016 12:19:40 +0900 Subject: perf symbols: Introduce DSO__NAME_KALLSYMS and DSO__NAME_KCORE Instead of using a raw string, use DSO__NAME_KALLSYMS and DSO__NAME_KCORE macros for kallsyms and kcore. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Brendan Gregg Cc: Hemant Kumar Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20160515031935.4017.50971.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-buildid-cache.c | 8 ++++---- tools/perf/util/annotate.c | 2 +- tools/perf/util/machine.c | 2 +- tools/perf/util/symbol.c | 10 +++++----- tools/perf/util/symbol.h | 3 +++ 5 files changed, 14 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 632efc6b79a0..d75bded21fe0 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -119,8 +119,8 @@ static int build_id_cache__add_kcore(const char *filename, bool force) if (build_id_cache__kcore_buildid(from_dir, sbuildid) < 0) return -1; - scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", - buildid_dir, sbuildid); + scnprintf(to_dir, sizeof(to_dir), "%s/%s/%s", + buildid_dir, DSO__NAME_KCORE, sbuildid); if (!force && !build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) { @@ -131,8 +131,8 @@ static int build_id_cache__add_kcore(const char *filename, bool force) if (build_id_cache__kcore_dir(dir, sizeof(dir))) return -1; - scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s", - buildid_dir, sbuildid, dir); + scnprintf(to_dir, sizeof(to_dir), "%s/%s/%s/%s", + buildid_dir, DSO__NAME_KCORE, sbuildid, dir); if (mkdir_p(to_dir, 0755)) return -1; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 4db73d5a0dbc..b811924e5e1b 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1122,7 +1122,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize) } else if (dso__is_kcore(dso)) { goto fallback; } else if (readlink(symfs_filename, command, sizeof(command)) < 0 || - strstr(command, "[kernel.kallsyms]") || + strstr(command, DSO__NAME_KALLSYMS) || access(symfs_filename, R_OK)) { free(filename); fallback: diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 639a2903065e..18dd96bdde05 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -709,7 +709,7 @@ static struct dso *machine__get_kernel(struct machine *machine) if (machine__is_host(machine)) { vmlinux_name = symbol_conf.vmlinux_name; if (!vmlinux_name) - vmlinux_name = "[kernel.kallsyms]"; + vmlinux_name = DSO__NAME_KALLSYMS; kernel = machine__findnew_kernel(machine, vmlinux_name, "[kernel]", DSO_TYPE_KERNEL); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 7fb33304fb4e..2252b545ff43 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1662,8 +1662,8 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); - scnprintf(path, sizeof(path), "%s/[kernel.kcore]/%s", buildid_dir, - sbuild_id); + scnprintf(path, sizeof(path), "%s/%s/%s", buildid_dir, + DSO__NAME_KCORE, sbuild_id); /* Use /proc/kallsyms if possible */ if (is_host) { @@ -1699,8 +1699,8 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) if (!find_matching_kcore(map, path, sizeof(path))) return strdup(path); - scnprintf(path, sizeof(path), "%s/[kernel.kallsyms]/%s", - buildid_dir, sbuild_id); + scnprintf(path, sizeof(path), "%s/%s/%s", + buildid_dir, DSO__NAME_KALLSYMS, sbuild_id); if (access(path, F_OK)) { pr_err("No kallsyms or vmlinux with build-id %s was found\n", @@ -1769,7 +1769,7 @@ do_kallsyms: if (err > 0 && !dso__is_kcore(dso)) { dso->binary_type = DSO_BINARY_TYPE__KALLSYMS; - dso__set_long_name(dso, "[kernel.kallsyms]", false); + dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); map__fixup_start(map); map__fixup_end(map); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2b5e4ed76fcb..25f2fd672c2e 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -44,6 +44,9 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, #define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ #endif +#define DSO__NAME_KALLSYMS "[kernel.kallsyms]" +#define DSO__NAME_KCORE "[kernel.kcore]" + /** struct symbol - symtab entry * * @ignore - resolvable but tools ignore it (e.g. idle routines) -- cgit From a29d5c9b8167dbc21a7ca8c0302e3799f9063b4e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 16 May 2016 21:16:54 -0300 Subject: perf tools: Separate accounting of contexts and real addresses in a stack trace The perf_sample->ip_callchain->nr value includes all the entries in the ip_callchain->ip[] array, real addresses and PERF_CONTEXT_{KERNEL,USER,etc}, while what the user expects is that what is in the kernel.perf_event_max_stack sysctl or in the upcoming per event perf_event_attr.sample_max_stack knob be honoured in terms of IP addresses in the stack trace. So match the kernel support and validate chain->nr taking into account both kernel.perf_event_max_stack and kernel.perf_event_max_contexts_per_stack. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: David Ahern Cc: Frederic Weisbecker Cc: He Kuang Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Cc: Zefan Li Link: http://lkml.kernel.org/n/tip-mgx0jpzfdq4uq4abfa40byu0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 3 +++ tools/perf/util/machine.c | 26 +++++++++++++++++--------- tools/perf/util/util.c | 3 ++- tools/perf/util/util.h | 3 ++- 4 files changed, 24 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 797000842d40..15982cee5ef3 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -549,6 +549,9 @@ int main(int argc, const char **argv) if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0) sysctl_perf_event_max_stack = value; + if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0) + sysctl_perf_event_max_contexts_per_stack = value; + cmd = extract_argv0_path(argv[0]); if (!cmd) cmd = "perf-help"; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 18dd96bdde05..7ba9fadb68af 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1811,9 +1811,9 @@ static int thread__resolve_callchain_sample(struct thread *thread, { struct branch_stack *branch = sample->branch_stack; struct ip_callchain *chain = sample->callchain; - int chain_nr = min(max_stack, (int)chain->nr); + int chain_nr = chain->nr; u8 cpumode = PERF_RECORD_MISC_USER; - int i, j, err; + int i, j, err, nr_entries, nr_contexts; int skip_idx = -1; int first_call = 0; @@ -1828,7 +1828,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, * Based on DWARF debug information, some architectures skip * a callchain entry saved by the kernel. */ - if (chain->nr < sysctl_perf_event_max_stack) + if (chain_nr < sysctl_perf_event_max_stack) skip_idx = arch_skip_callchain_idx(thread, chain); /* @@ -1889,12 +1889,8 @@ static int thread__resolve_callchain_sample(struct thread *thread, } check_calls: - if (chain->nr > sysctl_perf_event_max_stack && (int)chain->nr > max_stack) { - pr_warning("corrupted callchain. skipping...\n"); - return 0; - } - - for (i = first_call; i < chain_nr; i++) { + for (i = first_call, nr_entries = 0, nr_contexts = 0; + i < chain_nr && nr_entries < max_stack; i++) { u64 ip; if (callchain_param.order == ORDER_CALLEE) @@ -1908,6 +1904,14 @@ check_calls: #endif ip = chain->ips[j]; + if (ip >= PERF_CONTEXT_MAX) { + if (++nr_contexts > sysctl_perf_event_max_contexts_per_stack) + goto out_corrupted_callchain; + } else { + if (++nr_entries > sysctl_perf_event_max_stack) + goto out_corrupted_callchain; + } + err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip); if (err) @@ -1915,6 +1919,10 @@ check_calls: } return 0; + +out_corrupted_callchain: + pr_warning("corrupted callchain. skipping...\n"); + return 0; } static int unwind_entry(struct unwind_entry *entry, void *arg) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index eab077ad6ca9..23504ad5d6dd 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -33,7 +33,8 @@ struct callchain_param callchain_param = { unsigned int page_size; int cacheline_size; -unsigned int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH; +int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH; +int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK; bool test_attr__enabled; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 7651633a8dc7..1e8c3167b9fb 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -261,7 +261,8 @@ void sighandler_dump_stack(int sig); extern unsigned int page_size; extern int cacheline_size; -extern unsigned int sysctl_perf_event_max_stack; +extern int sysctl_perf_event_max_stack; +extern int sysctl_perf_event_max_contexts_per_stack; struct parse_tag { char tag; -- cgit From 2e51f26245701cb28f154552836b7807159088a8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 16 May 2016 15:31:07 -0500 Subject: objtool: Allow building with older libelf The switch to elf_getshdr{num,strndx} post-dates the oldest tool chain the kernel is supposed to be able to build with, so try to cope with such an environment. Signed-off-by: Jan Beulich Signed-off-by: Josh Poimboeuf Cc: # for v4.6 Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jan Beulich Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/732dae6872b7ff187d94f22bb699a12849d3fe04.1463430618.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- tools/objtool/Makefile | 4 ++++ tools/objtool/elf.h | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 6765c7e949f3..f094f3c4ed84 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -30,6 +30,10 @@ INCLUDES := -I$(srctree)/tools/include CFLAGS += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES) LDFLAGS += -lelf $(LIBSUBCMD) +# Allow old libelf to be used: +elfshdr := $(shell echo '\#include ' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr) +CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED) + AWK = awk export srctree OUTPUT CFLAGS ARCH AWK include $(srctree)/tools/build/Makefile.include diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index 7f3e00a2f907..aa1ff6596684 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -23,6 +23,11 @@ #include #include +#ifdef LIBELF_USE_DEPRECATED +# define elf_getshdrnum elf_getshnum +# define elf_getshdrstrndx elf_getshstrndx +#endif + struct section { struct list_head list; GElf_Shdr sh; -- cgit From 45e90056904b12d8dd74e0d2ea6dfd5e4394104d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 May 2016 11:52:26 -0300 Subject: perf machine: Do not bail out if not managing to read ref reloc symbol This means the user can't access /proc/kallsyms, for instance, because /proc/sys/kernel/kptr_restrict is set to 1. Instead leave the ref_reloc_sym as NULL and code using it will cope. This allows 'perf trace' to work on such systems for !root, the only issue would be when trying to resolve kernel symbols, which happens, for instance, in some libtracevent plugins. A warning for that case will be provided in the next patch in this series. Noticed in Ubuntu 16.04, that comes with kptr_restrict=1. Reported-by: Milian Wolff Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-knpu3z4iyp2dxpdfm798fac4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7ba9fadb68af..b277984aaa93 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1135,10 +1135,10 @@ int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); const char *name; - u64 addr = machine__get_running_kernel_start(machine, &name); + u64 addr; int ret; - if (!addr || kernel == NULL) + if (kernel == NULL) return -1; ret = __machine__create_kernel_maps(machine, kernel); @@ -1160,8 +1160,9 @@ int machine__create_kernel_maps(struct machine *machine) */ map_groups__fixup_end(&machine->kmaps); - if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, - addr)) { + addr = machine__get_running_kernel_start(machine, &name); + if (!addr) { + } else if (maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { machine__destroy_kernel_maps(machine); return -1; } -- cgit From caf8a0d0499792ac1b3f5b0b84e5890df0039cb6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 May 2016 11:56:24 -0300 Subject: perf trace: Warn when trying to resolve kernel addresses with kptr_restrict=1 Hook into the libtraceevent plugin kernel symbol resolver to warn the user that that can't happen with kptr_restrict=1. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-9gc412xx1gl0lvqj1d1xwlyb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 20 +++++++++++++++++++- tools/perf/util/machine.c | 1 + tools/perf/util/machine.h | 1 + 3 files changed, 21 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6e5c325148e4..e488ac756f39 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1160,6 +1160,24 @@ static int trace__tool_process(struct perf_tool *tool, return trace__process_event(trace, machine, event, sample); } +static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) +{ + struct machine *machine = vmachine; + + if (machine->kptr_restrict_warned) + return NULL; + + if (symbol_conf.kptr_restrict) { + pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" + "Check /proc/sys/kernel/kptr_restrict.\n\n" + "Kernel samples will not be resolved.\n"); + machine->kptr_restrict_warned = true; + return NULL; + } + + return machine__resolve_kernel_addr(vmachine, addrp, modp); +} + static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) { int err = symbol__init(NULL); @@ -1171,7 +1189,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) if (trace->host == NULL) return -ENOMEM; - if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0) + if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0) return -errno; err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b277984aaa93..bdc33ce40bdc 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -43,6 +43,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->symbol_filter = NULL; machine->id_hdr_size = 0; + machine->kptr_restrict_warned = false; machine->comm_exec = false; machine->kernel_start = 0; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 83f46790c52f..41ac9cfd416b 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -28,6 +28,7 @@ struct machine { pid_t pid; u16 id_hdr_size; bool comm_exec; + bool kptr_restrict_warned; char *root_dir; struct rb_root threads; pthread_rwlock_t threads_lock; -- cgit From e77a07425f4391da2f08b0f9a09df4e70626204d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 May 2016 11:58:52 -0300 Subject: perf top: Use machine->kptr_restrict_warned Its now there, no need to have it too. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-y18oeou494uy11im7u9to0dx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 6 +++--- tools/perf/util/top.h | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 1793da585676..2a6cc254ad0c 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -732,7 +732,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (machine__resolve(machine, &al, sample) < 0) return; - if (!top->kptr_restrict_warned && + if (!machine->kptr_restrict_warned && symbol_conf.kptr_restrict && al.cpumode == PERF_RECORD_MISC_KERNEL) { ui__warning( @@ -743,7 +743,7 @@ static void perf_event__process_sample(struct perf_tool *tool, " modules" : ""); if (use_browser <= 0) sleep(5); - top->kptr_restrict_warned = true; + machine->kptr_restrict_warned = true; } if (al.sym == NULL) { @@ -759,7 +759,7 @@ static void perf_event__process_sample(struct perf_tool *tool, * --hide-kernel-symbols, even if the user specifies an * invalid --vmlinux ;-) */ - if (!top->kptr_restrict_warned && !top->vmlinux_warned && + if (!machine->kptr_restrict_warned && !top->vmlinux_warned && al.map == machine->vmlinux_maps[MAP__FUNCTION] && RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { if (symbol_conf.vmlinux_name) { diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index f92c37abb0a8..b2940c88734a 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -27,7 +27,6 @@ struct perf_top { int max_stack; bool hide_kernel_symbols, hide_user_symbols, zero; bool use_tui, use_stdio; - bool kptr_restrict_warned; bool vmlinux_warned; bool dump_symtab; struct hist_entry *sym_filter_entry; -- cgit From c008f78f9309a3be3548e11f48b8dfb08f2eb8fc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 17 May 2016 12:13:54 -0300 Subject: perf trace: Fix exit_group() formatting This doesn't return, so there is no raw_syscalls:sys_exit for it, add the ending ')', without any return value, since it is void. Reported-by: Ingo Molnar Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-vh2mii0g4qlveuc4joufbipu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e488ac756f39..a2f3aa09c7f4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1552,7 +1552,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (sc->is_exit) { if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) { trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); - fprintf(trace->output, "%-70s\n", ttrace->entry_str); + fprintf(trace->output, "%-70s)\n", ttrace->entry_str); } } else { ttrace->entry_pending = true; -- cgit From bf8bddbf1971d40549f33bc6f70623cf53bbfa2f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 May 2016 11:14:15 -0300 Subject: perf callchain: Stop validating callchains by the max_stack sysctl As thread__resolve_callchain_sample can be used for handling perf.data files, that could've been recorded with a large max_stack sysctl setting than what the system used for analysis has set. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: David Ahern Cc: Frederic Weisbecker Cc: He Kuang Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Cc: Zefan Li Link: http://lkml.kernel.org/n/tip-2995bt2g5yq2m05vga4kip6m@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index bdc33ce40bdc..205d27017361 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1771,11 +1771,6 @@ static int resolve_lbr_callchain_sample(struct thread *thread, */ int mix_chain_nr = i + 1 + lbr_nr + 1; - if (mix_chain_nr > (int)sysctl_perf_event_max_stack + PERF_MAX_BRANCH_DEPTH) { - pr_warning("corrupted callchain. skipping...\n"); - return 0; - } - for (j = 0; j < mix_chain_nr; j++) { if (callchain_param.order == ORDER_CALLEE) { if (j < i + 1) @@ -1815,7 +1810,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, struct ip_callchain *chain = sample->callchain; int chain_nr = chain->nr; u8 cpumode = PERF_RECORD_MISC_USER; - int i, j, err, nr_entries, nr_contexts; + int i, j, err, nr_entries; int skip_idx = -1; int first_call = 0; @@ -1830,8 +1825,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, * Based on DWARF debug information, some architectures skip * a callchain entry saved by the kernel. */ - if (chain_nr < sysctl_perf_event_max_stack) - skip_idx = arch_skip_callchain_idx(thread, chain); + skip_idx = arch_skip_callchain_idx(thread, chain); /* * Add branches to call stack for easier browsing. This gives @@ -1891,7 +1885,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, } check_calls: - for (i = first_call, nr_entries = 0, nr_contexts = 0; + for (i = first_call, nr_entries = 0; i < chain_nr && nr_entries < max_stack; i++) { u64 ip; @@ -1906,13 +1900,8 @@ check_calls: #endif ip = chain->ips[j]; - if (ip >= PERF_CONTEXT_MAX) { - if (++nr_contexts > sysctl_perf_event_max_contexts_per_stack) - goto out_corrupted_callchain; - } else { - if (++nr_entries > sysctl_perf_event_max_stack) - goto out_corrupted_callchain; - } + if (ip < PERF_CONTEXT_MAX) + ++nr_entries; err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip); @@ -1921,10 +1910,6 @@ check_calls: } return 0; - -out_corrupted_callchain: - pr_warning("corrupted callchain. skipping...\n"); - return 0; } static int unwind_entry(struct unwind_entry *entry, void *arg) -- cgit From fe176085a4d45fb241c04beea872ecbb8bef6a55 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 May 2016 11:34:06 -0300 Subject: perf tools: Fix usage of max_stack sysctl We cannot limit processing stacks from the current value of the sysctl, as we may be processing perf.data files, possibly from other machines. Instead use the old PERF_MAX_STACK_DEPTH, the sysctl default, that can be overriden using --max-stack or equivalent. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: David Ahern Cc: Frederic Weisbecker Cc: He Kuang Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Wang Nan Cc: Zefan Li Fixes: 4cb93446c587 ("perf tools: Set the maximum allowed stack from /proc/sys/kernel/perf_event_max_stack") Link: http://lkml.kernel.org/n/tip-eqeutsr7n7wy0c36z24ytvii@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 2 +- tools/perf/Documentation/perf-script.txt | 2 +- tools/perf/Documentation/perf-trace.txt | 3 ++- tools/perf/builtin-report.c | 2 +- tools/perf/builtin-script.c | 2 -- tools/perf/builtin-trace.c | 2 +- tools/perf/util/db-export.c | 3 +-- tools/perf/util/scripting-engines/trace-event-perl.c | 3 +-- 8 files changed, 8 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index ebaf849e30ef..496d42cdf02b 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -248,7 +248,7 @@ OPTIONS Note that when using the --itrace option the synthesized callchain size will override this value if the synthesized callchain size is bigger. - Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise. + Default: 127 -G:: --inverted:: diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index a856a1095893..4fc44c75263f 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -267,7 +267,7 @@ include::itrace.txt[] Note that when using the --itrace option the synthesized callchain size will override this value if the synthesized callchain size is bigger. - Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise. + Default: 127 --ns:: Use 9 decimal places when displaying time (i.e. show the nanoseconds) diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 6afe20121bc0..1ab0782369b1 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -143,7 +143,8 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. Implies '--call-graph dwarf' when --call-graph not present on the command line, on systems where DWARF unwinding was built in. - Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise. + Default: /proc/sys/kernel/perf_event_max_stack when present for + live sessions (without --input/-i), 127 otherwise. --min-stack:: Set the stack depth limit when parsing the callchain, anything diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 87d40e3c4078..9bc71c6a54f6 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -691,7 +691,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) .ordered_events = true, .ordering_requires_timestamps = true, }, - .max_stack = sysctl_perf_event_max_stack, + .max_stack = PERF_MAX_STACK_DEPTH, .pretty_printing_style = "normal", .socket_filter = -1, }; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index efca81679bb3..0a83f4bacc7c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2067,8 +2067,6 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; - scripting_max_stack = sysctl_perf_event_max_stack; - setup_scripting(); argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index a2f3aa09c7f4..7401de71d097 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2905,7 +2905,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) mmap_pages_user_set = false; if (trace.max_stack == UINT_MAX) { - trace.max_stack = sysctl_perf_event_max_stack; + trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack; max_stack_user_set = false; } diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 8d96c80cc67e..c9a6dc173e74 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -298,8 +298,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe, */ callchain_param.order = ORDER_CALLER; err = thread__resolve_callchain(thread, &callchain_cursor, evsel, - sample, NULL, NULL, - sysctl_perf_event_max_stack); + sample, NULL, NULL, PERF_MAX_STACK_DEPTH); if (err) { callchain_param.order = saved_order; return NULL; diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 62c7f6988e0e..5d1eb1ccd96c 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -264,8 +264,7 @@ static SV *perl_process_callchain(struct perf_sample *sample, goto exit; if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel, - sample, NULL, NULL, - sysctl_perf_event_max_stack) != 0) { + sample, NULL, NULL, scripting_max_stack) != 0) { pr_err("Failed to resolve callchain. Skipping\n"); goto exit; } -- cgit From 58c0400176b2cd35da43f3115fa94ca937483aca Mon Sep 17 00:00:00 2001 From: Chris Ryder Date: Thu, 19 May 2016 17:59:45 +0100 Subject: perf annotate: Fix identification of ARM blt and bls instructions The ARM blt and bls instructions are not correctly identified when parsing assembly because the list of recognised instructions must be sorted by name. Swap the ordering of blt and bls. Signed-off-by: Chris Ryder Acked-by: Pawel Moll Cc: Alexander Shishkin Cc: Mark Rutland Cc: Peter Zijlstra Cc: Will Deacon Link: http://lkml.kernel.org/r/560e196b7c79b7ff853caae13d8719a31479cb1a.1463676839.git.chris.ryder@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b811924e5e1b..3d9f2ca2ed2d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -372,8 +372,8 @@ static struct ins instructions[] = { { .name = "bgt", .ops = &jump_ops, }, { .name = "bhi", .ops = &jump_ops, }, { .name = "bl", .ops = &call_ops, }, - { .name = "blt", .ops = &jump_ops, }, { .name = "bls", .ops = &jump_ops, }, + { .name = "blt", .ops = &jump_ops, }, { .name = "blx", .ops = &call_ops, }, { .name = "bne", .ops = &jump_ops, }, #endif -- cgit From 7e4c1498130d7b6c26e6669839af4c7e321c9fec Mon Sep 17 00:00:00 2001 From: Chris Ryder Date: Thu, 19 May 2016 17:59:46 +0100 Subject: perf annotate: Sort list of recognised instructions Currently the list of instructions recognised by perf annotate has to be explicitly written in sorted order. This makes it easy to make mistakes when adding new instructions. Sort the list of instructions on first access. Signed-off-by: Chris Ryder Acked-by: Pawel Moll Cc: Alexander Shishkin Cc: Mark Rutland Cc: Peter Zijlstra Cc: Will Deacon Link: http://lkml.kernel.org/r/4268febaf32f47f322c166fb2fe98cfec7041e11.1463676839.git.chris.ryder@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 3d9f2ca2ed2d..7e5a1e8874ce 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -354,9 +354,6 @@ static struct ins_ops nop_ops = { .scnprintf = nop__scnprintf, }; -/* - * Must be sorted by name! - */ static struct ins instructions[] = { { .name = "add", .ops = &mov_ops, }, { .name = "addl", .ops = &mov_ops, }, @@ -449,18 +446,39 @@ static struct ins instructions[] = { { .name = "xbeginq", .ops = &jump_ops, }, }; -static int ins__cmp(const void *name, const void *insp) +static int ins__key_cmp(const void *name, const void *insp) { const struct ins *ins = insp; return strcmp(name, ins->name); } +static int ins__cmp(const void *a, const void *b) +{ + const struct ins *ia = a; + const struct ins *ib = b; + + return strcmp(ia->name, ib->name); +} + +static void ins__sort(void) +{ + const int nmemb = ARRAY_SIZE(instructions); + + qsort(instructions, nmemb, sizeof(struct ins), ins__cmp); +} + static struct ins *ins__find(const char *name) { const int nmemb = ARRAY_SIZE(instructions); + static bool sorted; + + if (!sorted) { + ins__sort(); + sorted = true; + } - return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__cmp); + return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__key_cmp); } int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym) -- cgit From caa36ed7ba82caa9f685dd04aa10d0bf87ea8b6a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 May 2016 19:00:27 -0300 Subject: perf trace: Only auto set call-graph to "dwarf" when syscalls are being traced When --min-stack or --max-stack is passwd but --no-syscalls is also in effect, there is no point in automatically setting '--call-graph dwarf'. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-pq922i7h9wef0pho1dqpttvn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7401de71d097..487c10401d46 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2910,7 +2910,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) } #ifdef HAVE_DWARF_UNWIND_SUPPORT - if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) + if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls) record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false); #endif -- cgit From a706670900073d236938d539d1109338d64b47bb Mon Sep 17 00:00:00 2001 From: He Kuang Date: Thu, 19 May 2016 11:47:37 +0000 Subject: perf tools: Set buildid dir under symfs when --symfs is provided This patch moves the reference of buildid dir to 'symfs/.debug' and skips the local buildid dir when '--symfs' is given, so that every single file opened by perf is relative to symfs directory now. Signed-off-by: He Kuang Acked-by: David Ahern Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ekaterina Tumanova Cc: Josh Poimboeuf Cc: Kan Liang Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Sukadev Bhattiprolu Cc: Wang Nan Link: http://lkml.kernel.org/r/1463658462-85131-2-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 5 +++-- tools/perf/builtin-diff.c | 5 +++-- tools/perf/builtin-report.c | 5 +++-- tools/perf/builtin-script.c | 5 +++-- tools/perf/builtin-timechart.c | 5 +++-- tools/perf/util/dso.c | 4 +--- tools/perf/util/symbol.c | 23 +++++++++++++++++++++++ tools/perf/util/symbol.h | 2 ++ 8 files changed, 41 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 814158393656..25c81734a950 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -324,8 +324,9 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing, "Skip symbols that cannot be annotated"), OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"), - OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", - "Look for files with symbols relative to this directory"), + OPT_CALLBACK(0, "symfs", NULL, "directory", + "Look for files with symbols relative to this directory", + symbol__config_symfs), OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, "Interleave source code with assembly code (default)"), OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 9ce354f469dc..f7645a42708e 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -812,8 +812,9 @@ static const struct option options[] = { OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), - OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", - "Look for files with symbols relative to this directory"), + OPT_CALLBACK(0, "symfs", NULL, "directory", + "Look for files with symbols relative to this directory", + symbol__config_symfs), OPT_UINTEGER('o', "order", &sort_compute, "Specify compute sorting."), OPT_CALLBACK(0, "percentage", NULL, "relative|absolute", "How to display percentage of filtered entries", parse_filter_percentage), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9bc71c6a54f6..a87cb338bdf1 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -770,8 +770,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "columns '.' is reserved."), OPT_BOOLEAN('U', "hide-unresolved", &symbol_conf.hide_unresolved, "Only display entries resolved to a symbol"), - OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", - "Look for files with symbols relative to this directory"), + OPT_CALLBACK(0, "symfs", NULL, "directory", + "Look for files with symbols relative to this directory", + symbol__config_symfs), OPT_STRING('C', "cpu", &report.cpu_list, "cpu", "list of cpus to profile"), OPT_BOOLEAN('I', "show-info", &report.show_full_info, diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 0a83f4bacc7c..e3ce2f34d3ad 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2010,8 +2010,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "file", "kallsyms pathname"), OPT_BOOLEAN('G', "hide-call-graph", &no_callchain, "When printing symbols do not display call chain"), - OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", - "Look for files with symbols relative to this directory"), + OPT_CALLBACK(0, "symfs", NULL, "directory", + "Look for files with symbols relative to this directory", + symbol__config_symfs), OPT_CALLBACK('F', "fields", NULL, "str", "comma separated output fields prepend with 'type:'. " "Valid types: hw,sw,trace,raw. " diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 40cc9bb3506c..733a55422d03 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1945,8 +1945,9 @@ int cmd_timechart(int argc, const char **argv, OPT_CALLBACK('p', "process", NULL, "process", "process selector. Pass a pid or process name.", parse_process), - OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", - "Look for files with symbols relative to this directory"), + OPT_CALLBACK(0, "symfs", NULL, "directory", + "Look for files with symbols relative to this directory", + symbol__config_symfs), OPT_INTEGER('n', "proc-num", &tchart.proc_num, "min. number of tasks to print"), OPT_BOOLEAN('t', "topology", &tchart.topology, diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 75b75615e2f8..5d286f5d7906 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -63,9 +63,7 @@ int dso__read_binary_type_filename(const struct dso *dso, } break; case DSO_BINARY_TYPE__BUILD_ID_CACHE: - /* skip the locally configured cache if a symfs is given */ - if (symbol_conf.symfs[0] || - (dso__build_id_filename(dso, filename, size) == NULL)) + if (dso__build_id_filename(dso, filename, size) == NULL) ret = -1; break; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 2252b545ff43..20f9cb32b703 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2033,3 +2033,26 @@ void symbol__exit(void) symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL; symbol_conf.initialized = false; } + +int symbol__config_symfs(const struct option *opt __maybe_unused, + const char *dir, int unset __maybe_unused) +{ + char *bf = NULL; + int ret; + + symbol_conf.symfs = strdup(dir); + if (symbol_conf.symfs == NULL) + return -ENOMEM; + + /* skip the locally configured cache if a symfs is given, and + * config buildid dir to symfs/.debug + */ + ret = asprintf(&bf, "%s/%s", dir, ".debug"); + if (ret < 0) + return -ENOMEM; + + set_buildid_dir(bf); + + free(bf); + return 0; +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 25f2fd672c2e..fa415347dbf9 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -290,6 +290,8 @@ bool symbol_type__is_a(char symbol_type, enum map_type map_type); bool symbol__restricted_filename(const char *filename, const char *restricted_filename); bool symbol__is_idle(struct symbol *sym); +int symbol__config_symfs(const struct option *opt __maybe_unused, + const char *dir, int unset __maybe_unused); int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, symbol_filter_t filter, -- cgit From b90dc17a5d14a881f9bb3b58edb3d71075d58afb Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 20 May 2016 16:38:23 +0000 Subject: perf evsel: Add overwrite attribute and check write_backward Add 'overwrite' attribute to evsel to mark whether this event is overwritable. The following commits will support syntax like: # perf record -e cycles/overwrite/ ... An overwritable evsel requires kernel support for the perf_event_attr.write_backward ring buffer feature. Add it to perf_missing_feature. Signed-off-by: Wang Nan Cc: He Kuang Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1463762315-155689-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 13 +++++++++++++ tools/perf/util/evsel.h | 1 + 2 files changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 964c7c3602c0..02c177d14c8d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -37,6 +37,7 @@ static struct { bool clockid; bool clockid_wrong; bool lbr_flags; + bool write_backward; } perf_missing_features; static clockid_t clockid; @@ -1376,6 +1377,8 @@ fallback_missing_features: if (perf_missing_features.lbr_flags) evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS | PERF_SAMPLE_BRANCH_NO_CYCLES); + if (perf_missing_features.write_backward) + evsel->attr.write_backward = false; retry_sample_id: if (perf_missing_features.sample_id_all) evsel->attr.sample_id_all = 0; @@ -1438,6 +1441,12 @@ retry_open: err = -EINVAL; goto out_close; } + + if (evsel->overwrite && + perf_missing_features.write_backward) { + err = -EINVAL; + goto out_close; + } } } @@ -1500,6 +1509,10 @@ try_fallback: PERF_SAMPLE_BRANCH_NO_FLAGS))) { perf_missing_features.lbr_flags = true; goto fallback_missing_features; + } else if (!perf_missing_features.write_backward && + evsel->attr.write_backward) { + perf_missing_features.write_backward = true; + goto fallback_missing_features; } out_close: diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8a644fef452c..c1f10159804c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -112,6 +112,7 @@ struct perf_evsel { bool tracking; bool per_pkg; bool precise_max; + bool overwrite; /* parse modifier helper */ int exclude_GH; int nr_members; -- cgit From d4c6fb36ac2c82f8f0c05b04cf102dcdc2d5a14d Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 20 May 2016 16:38:24 +0000 Subject: perf evsel: Record fd into perf_mmap Add a fd field into struct perf_mmap so that perf can track the mmap fd. This feature will be used for toggling overwrite ring buffers. Signed-off-by: Wang Nan Cc: He Kuang Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1463762315-155689-3-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 6 ++++++ tools/perf/util/evlist.h | 1 + 2 files changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c4bfe11479a0..1a370db02a8c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -881,6 +881,7 @@ static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) if (evlist->mmap[idx].base != NULL) { munmap(evlist->mmap[idx].base, evlist->mmap_len); evlist->mmap[idx].base = NULL; + evlist->mmap[idx].fd = -1; atomic_set(&evlist->mmap[idx].refcnt, 0); } auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap); @@ -901,10 +902,14 @@ void perf_evlist__munmap(struct perf_evlist *evlist) static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) { + int i; + evlist->nr_mmaps = cpu_map__nr(evlist->cpus); if (cpu_map__empty(evlist->cpus)) evlist->nr_mmaps = thread_map__nr(evlist->threads); evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); + for (i = 0; i < evlist->nr_mmaps; i++) + evlist->mmap[i].fd = -1; return evlist->mmap != NULL ? 0 : -ENOMEM; } @@ -941,6 +946,7 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, evlist->mmap[idx].base = NULL; return -1; } + evlist->mmap[idx].fd = fd; if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap, &mp->auxtrace_mp, evlist->mmap[idx].base, fd)) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 85d1b59802e8..0d165b1d8f77 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -28,6 +28,7 @@ struct record_opts; struct perf_mmap { void *base; int mask; + int fd; atomic_t refcnt; u64 prev; struct auxtrace_mmap auxtrace_mmap; -- cgit From f518b1607e128a8dcfa75f539864c1321c5a18ea Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:01:36 -0700 Subject: radix tree test suite: fix build Add an empty linux/init.h, and definitions for a few parts of the kernel API either in use now, or to be used in the near future. Start using the common definitions in tools/include/linux, although more work needs to be done here. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/linux/kernel.h | 12 ++++++++++-- tools/testing/radix-tree/linux/slab.h | 1 - tools/testing/radix-tree/linux/types.h | 7 ++----- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index ae013b0160ac..6d0cdf618084 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -7,19 +7,25 @@ #include #include +#include "../../include/linux/compiler.h" + #ifndef NULL #define NULL 0 #endif #define BUG_ON(expr) assert(!(expr)) +#define WARN_ON(expr) assert(!(expr)) #define __init #define __must_check #define panic(expr) #define printk printf #define __force -#define likely(c) (c) -#define unlikely(c) (c) #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) +#define pr_debug printk + +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define cpu_relax() barrier() #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) @@ -28,6 +34,8 @@ (type *)( (char *)__mptr - offsetof(type, member) );}) #define min(a, b) ((a) < (b) ? (a) : (b)) +#define cond_resched() sched_yield() + static inline int in_interrupt(void) { return 0; diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h index 57282506c21d..6d5a34770fd4 100644 --- a/tools/testing/radix-tree/linux/slab.h +++ b/tools/testing/radix-tree/linux/slab.h @@ -3,7 +3,6 @@ #include -#define GFP_KERNEL 1 #define SLAB_HWCACHE_ALIGN 1 #define SLAB_PANIC 2 #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ diff --git a/tools/testing/radix-tree/linux/types.h b/tools/testing/radix-tree/linux/types.h index 72a9d85f6c76..faa0b6ff9ca8 100644 --- a/tools/testing/radix-tree/linux/types.h +++ b/tools/testing/radix-tree/linux/types.h @@ -1,15 +1,13 @@ #ifndef _TYPES_H #define _TYPES_H +#include "../../include/linux/types.h" + #define __rcu #define __read_mostly #define BITS_PER_LONG (sizeof(long) * 8) -struct list_head { - struct list_head *next, *prev; -}; - static inline void INIT_LIST_HEAD(struct list_head *list) { list->next = list; @@ -22,7 +20,6 @@ typedef struct { #define uninitialized_var(x) x = x -typedef unsigned gfp_t; #include #endif -- cgit From d42cb1a9fffa9dc760c13302f00cdec25106e2f1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:01:39 -0700 Subject: radix tree test suite: add tests for radix_tree_locate_item() Fairly simple tests; add various items to the tree, then make sure we can find them again. Also check that a pointer that we know isn't in the tree is not found. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/linux/kernel.h | 3 +++ tools/testing/radix-tree/main.c | 41 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) (limited to 'tools') diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 6d0cdf618084..76a88f35fdc4 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -9,6 +9,9 @@ #include "../../include/linux/compiler.h" +#define CONFIG_SHMEM +#define CONFIG_SWAP + #ifndef NULL #define NULL 0 #endif diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 0e83cad27a9f..71c5272443b1 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -232,10 +232,51 @@ void copy_tag_check(void) item_kill_tree(&tree); } +void __locate_check(struct radix_tree_root *tree, unsigned long index) +{ + struct item *item; + unsigned long index2; + + item_insert(tree, index); + item = item_lookup(tree, index); + index2 = radix_tree_locate_item(tree, item); + if (index != index2) { + printf("index %ld inserted; found %ld\n", + index, index2); + abort(); + } +} + +static void locate_check(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + unsigned long offset, index; + + for (offset = 0; offset < (1 << 3); offset++) { + for (index = 0; index < (1UL << 5); index++) { + __locate_check(&tree, index + offset); + } + if (radix_tree_locate_item(&tree, &tree) != -1) + abort(); + + item_kill_tree(&tree); + } + + if (radix_tree_locate_item(&tree, &tree) != -1) + abort(); + __locate_check(&tree, -1); + if (radix_tree_locate_item(&tree, &tree) != -1) + abort(); + item_kill_tree(&tree); +} + static void single_thread_tests(void) { int i; + printf("starting single_thread_tests: %d allocated\n", nr_allocated); + locate_check(); + printf("after locate_check: %d allocated\n", nr_allocated); tag_check(); printf("after tag_check: %d allocated\n", nr_allocated); gang_check(); -- cgit From 97d778b2de9213c7a7483dad0f533c1af9f0810f Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:01:42 -0700 Subject: radix tree test suite: allow testing other fan-out values The defines in regression2.c are already in radix-tree.h and duplicating them in the test case makes experimenting with other values for the fan-out harder than necessary. Allow the user of the radix tree to decide what the fan-out should be rather than fixing it to 8 for non-kernel uses. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 4 +--- tools/testing/radix-tree/linux/kernel.h | 2 ++ tools/testing/radix-tree/regression2.c | 7 ------- 3 files changed, 3 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 83f708e5db59..5ce5a1e0ecc5 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -70,10 +70,8 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) #define RADIX_TREE_MAX_TAGS 3 -#ifdef __KERNEL__ +#ifndef RADIX_TREE_MAP_SHIFT #define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) -#else -#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ #endif #define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 76a88f35fdc4..31fe2c77d7ae 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -12,6 +12,8 @@ #define CONFIG_SHMEM #define CONFIG_SWAP +#define RADIX_TREE_MAP_SHIFT 3 + #ifndef NULL #define NULL 0 #endif diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c index 5d2fa28cdca3..63bf347aaf33 100644 --- a/tools/testing/radix-tree/regression2.c +++ b/tools/testing/radix-tree/regression2.c @@ -51,13 +51,6 @@ #include "regression.h" -#ifdef __KERNEL__ -#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) -#else -#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ -#endif - -#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) #define PAGECACHE_TAG_DIRTY 0 #define PAGECACHE_TAG_WRITEBACK 1 #define PAGECACHE_TAG_TOWRITE 2 -- cgit From aa1d62d8530d5adf158dd633d360108466f93fcd Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:01:45 -0700 Subject: radix tree test suite: keep regression test runs short Currently the full suite of regression tests take upwards of 30 minutes to run on my development machine. The vast majority of this time is taken by the big_gang_check() and copy_tag_check() tests, which each run their tests through thousands of iterations...does this have value? Without big_gang_check() and copy_tag_check(), the test suite runs in around 15 seconds on my box. Honestly the first time I ever ran through the entire test suite was to gather the timings for this email - it simply takes too long to be useful on a normal basis. Instead, hide the excessive iterations through big_gang_check() and copy_tag_check() tests behind an '-l' flag (for "long run") in case they are still useful, but allow the regression test suite to complete in a reasonable amount of time. We still run each of these tests a few times (3 at present) to try and keep the test coverage. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/main.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 71c5272443b1..122c8b9be17e 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -61,11 +61,11 @@ void __big_gang_check(void) } while (!wrapped); } -void big_gang_check(void) +void big_gang_check(bool long_run) { int i; - for (i = 0; i < 1000; i++) { + for (i = 0; i < (long_run ? 1000 : 3); i++) { __big_gang_check(); srand(time(0)); printf("%d ", i); @@ -270,7 +270,7 @@ static void locate_check(void) item_kill_tree(&tree); } -static void single_thread_tests(void) +static void single_thread_tests(bool long_run) { int i; @@ -285,9 +285,9 @@ static void single_thread_tests(void) printf("after add_and_check: %d allocated\n", nr_allocated); dynamic_height_check(); printf("after dynamic_height_check: %d allocated\n", nr_allocated); - big_gang_check(); + big_gang_check(long_run); printf("after big_gang_check: %d allocated\n", nr_allocated); - for (i = 0; i < 2000; i++) { + for (i = 0; i < (long_run ? 2000 : 3); i++) { copy_tag_check(); printf("%d ", i); fflush(stdout); @@ -295,15 +295,23 @@ static void single_thread_tests(void) printf("after copy_tag_check: %d allocated\n", nr_allocated); } -int main(void) +int main(int argc, char **argv) { + bool long_run = false; + int opt; + + while ((opt = getopt(argc, argv, "l")) != -1) { + if (opt == 'l') + long_run = true; + } + rcu_register_thread(); radix_tree_init(); regression1_test(); regression2_test(); regression3_test(); - single_thread_tests(); + single_thread_tests(long_run); sleep(1); printf("after sleep(1): %d allocated\n", nr_allocated); -- cgit From 7f308671c79899c1b4e275867d3647f64e896e78 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:01:48 -0700 Subject: radix tree test suite: rebuild when headers change When we make changes to radix-tree.h in the regular kernel source (include/linux/radix-tree.h), we really want our test code to be rebuilt. We also include a few other headers from tools/include and probably want to rebuild if these have been changed. Update the makefile so that all of our objects will be rebuilt when any of the headers we depend on are changed. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 604212db9d4b..43febba864bd 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -13,7 +13,7 @@ main: $(OFILES) clean: $(RM) -f $(TARGETS) *.o radix-tree.c -$(OFILES): *.h */*.h +$(OFILES): *.h */*.h ../../../include/linux/radix-tree.h ../../include/linux/*.h radix-tree.c: ../../../lib/radix-tree.c sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ -- cgit From 57578c2ea2cb2e0d362a9212ac83cf90221d4883 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:01:54 -0700 Subject: raxix-tree: introduce CONFIG_RADIX_TREE_MULTIORDER I've been receiving increasingly concerned notes from 0day about how much my recent changes have been bloating the radix tree. Make it happier by only including multiorder support if CONFIG_TRANSPARENT_HUGEPAGES is set. This is an independent Kconfig option, so other radix tree users can also set it if they have a need. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig | 3 +++ lib/radix-tree.c | 26 ++++++++++++++++++-------- mm/Kconfig | 1 + tools/testing/radix-tree/linux/kernel.h | 1 + 4 files changed, 23 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/lib/Kconfig b/lib/Kconfig index 61d55bd0ed89..d79909dc01ec 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -362,6 +362,9 @@ config INTERVAL_TREE for more information. +config RADIX_TREE_MULTIORDER + bool + config ASSOCIATIVE_ARRAY bool help diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 1624c4117961..799f341977d0 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -484,6 +484,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, slot = node->slots[offset]; } +#ifdef CONFIG_RADIX_TREE_MULTIORDER /* Insert pointers to the canonical entry */ if ((shift - order) > 0) { int i, n = 1 << (shift - order); @@ -499,6 +500,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, node->count++; } } +#endif if (nodep) *nodep = node; @@ -1469,6 +1471,20 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, return deleted; } +static inline void delete_sibling_entries(struct radix_tree_node *node, + void *ptr, unsigned offset) +{ +#ifdef CONFIG_RADIX_TREE_MULTIORDER + int i; + for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { + if (node->slots[offset + i] != ptr) + break; + node->slots[offset + i] = NULL; + node->count--; + } +#endif +} + /** * radix_tree_delete_item - delete an item from a radix tree * @root: radix tree root @@ -1484,7 +1500,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, unsigned long index, void *item) { struct radix_tree_node *node; - unsigned int offset, i; + unsigned int offset; void **slot; void *entry; int tag; @@ -1513,13 +1529,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root, radix_tree_tag_clear(root, index, tag); } - /* Delete any sibling slots pointing to this slot */ - for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) { - if (node->slots[offset + i] != ptr_to_indirect(slot)) - break; - node->slots[offset + i] = NULL; - node->count--; - } + delete_sibling_entries(node, ptr_to_indirect(slot), offset); node->slots[offset] = NULL; node->count--; diff --git a/mm/Kconfig b/mm/Kconfig index 1a6a28ebcb8b..2664c118b5d2 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -404,6 +404,7 @@ config TRANSPARENT_HUGEPAGE bool "Transparent Hugepage Support" depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE select COMPACTION + select RADIX_TREE_MULTIORDER help Transparent Hugepages allows the kernel to use huge pages and huge tlb transparently to the applications whenever possible. diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 31fe2c77d7ae..8ea0ed450810 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -9,6 +9,7 @@ #include "../../include/linux/compiler.h" +#define CONFIG_RADIX_TREE_MULTIORDER #define CONFIG_SHMEM #define CONFIG_SWAP -- cgit From 4f3755d1ae3cd856a5c7da3dea12cced8dc51fbf Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:14 -0700 Subject: radix tree test suite: start adding multiorder tests Test suite infrastructure for working with multiorder entries. The test itself is pretty basic: Add an entry, check that all expected indices return that entry and that indices around that entry don't return an entry. Then delete the entry and check no index returns that entry. Tests a few edge conditions including the multiorder entry at index 0 and at a higher index. Also tests deleting through an alias as well as through the canonical index. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/Makefile | 2 +- tools/testing/radix-tree/main.c | 2 ++ tools/testing/radix-tree/multiorder.c | 58 +++++++++++++++++++++++++++++++++++ tools/testing/radix-tree/test.c | 13 ++++++-- tools/testing/radix-tree/test.h | 6 +++- 5 files changed, 76 insertions(+), 5 deletions(-) create mode 100644 tools/testing/radix-tree/multiorder.c (limited to 'tools') diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 43febba864bd..3b530467148e 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -3,7 +3,7 @@ CFLAGS += -I. -g -Wall -D_LGPL_SOURCE LDFLAGS += -lpthread -lurcu TARGETS = main OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \ - regression1.o regression2.o regression3.o + regression1.o regression2.o regression3.o multiorder.o targets: $(TARGETS) diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 122c8b9be17e..b6a700b00cce 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -275,6 +275,8 @@ static void single_thread_tests(bool long_run) int i; printf("starting single_thread_tests: %d allocated\n", nr_allocated); + multiorder_checks(); + printf("after multiorder_check: %d allocated\n", nr_allocated); locate_check(); printf("after locate_check: %d allocated\n", nr_allocated); tag_check(); diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c new file mode 100644 index 000000000000..cfe718c78eb6 --- /dev/null +++ b/tools/testing/radix-tree/multiorder.c @@ -0,0 +1,58 @@ +/* + * multiorder.c: Multi-order radix tree entry testing + * Copyright (c) 2016 Intel Corporation + * Author: Ross Zwisler + * Author: Matthew Wilcox + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include +#include +#include + +#include "test.h" + +static void multiorder_check(unsigned long index, int order) +{ + unsigned long i; + unsigned long min = index & ~((1UL << order) - 1); + unsigned long max = min + (1UL << order); + RADIX_TREE(tree, GFP_KERNEL); + + printf("Multiorder index %ld, order %d\n", index, order); + + assert(item_insert_order(&tree, index, order) == 0); + + for (i = min; i < max; i++) { + struct item *item = item_lookup(&tree, i); + assert(item != 0); + assert(item->index == index); + } + for (i = 0; i < min; i++) + item_check_absent(&tree, i); + for (i = max; i < 2*max; i++) + item_check_absent(&tree, i); + + assert(item_delete(&tree, index) != 0); + + for (i = 0; i < 2*max; i++) + item_check_absent(&tree, i); +} + +void multiorder_checks(void) +{ + int i; + + for (i = 0; i < 20; i++) { + multiorder_check(200, i); + multiorder_check(0, i); + multiorder_check((1UL << i) + 1, i); + } +} diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 2bebf34cdc27..da54f11e8ba7 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -24,14 +24,21 @@ int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag) return radix_tree_tag_get(root, index, tag); } -int __item_insert(struct radix_tree_root *root, struct item *item) +int __item_insert(struct radix_tree_root *root, struct item *item, + unsigned order) { - return radix_tree_insert(root, item->index, item); + return __radix_tree_insert(root, item->index, order, item); } int item_insert(struct radix_tree_root *root, unsigned long index) { - return __item_insert(root, item_create(index)); + return __item_insert(root, item_create(index), 0); +} + +int item_insert_order(struct radix_tree_root *root, unsigned long index, + unsigned order) +{ + return __item_insert(root, item_create(index), order); } int item_delete(struct radix_tree_root *root, unsigned long index) diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 4e1d95faaa94..53cb595db44a 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -8,8 +8,11 @@ struct item { }; struct item *item_create(unsigned long index); -int __item_insert(struct radix_tree_root *root, struct item *item); +int __item_insert(struct radix_tree_root *root, struct item *item, + unsigned order); int item_insert(struct radix_tree_root *root, unsigned long index); +int item_insert_order(struct radix_tree_root *root, unsigned long index, + unsigned order); int item_delete(struct radix_tree_root *root, unsigned long index); struct item *item_lookup(struct radix_tree_root *root, unsigned long index); @@ -23,6 +26,7 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start, void item_kill_tree(struct radix_tree_root *root); void tag_check(void); +void multiorder_checks(void); struct item * item_tag_set(struct radix_tree_root *root, unsigned long index, int tag); -- cgit From afe0e395b6d1817fa5393f1ad6fcbf71406b016d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:17 -0700 Subject: radix-tree: fix several shrinking bugs with multiorder entries Setting the indirect bit on the user data entry used to be unambiguous because the tree walking code knew not to expect internal nodes in the last level of the tree. Multiorder entries can appear at any level of the tree, and a leaf with the indirect bit set is indistinguishable from a pointer to a node. Introduce a special entry (RADIX_TREE_RETRY) which is neither a valid user entry, nor a valid pointer to a node. The radix_tree_deref_retry() function continues to work the same way, but tree walking code can distinguish it from a pointer to a node. Also fix the condition for setting slot->parent to NULL; it does not matter what height the tree is, it only matters whether slot is an indirect pointer. Move this code above the comment which is referring to the assignment to root->rnode. Also fix the condition for preventing the tree from shrinking to a single entry if it's a multiorder entry. Add a test-case to the test suite that checks that the tree goes back down to its original height after an item is inserted & deleted from a higher index in the tree. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 23 +++++++++++---------- tools/testing/radix-tree/multiorder.c | 39 +++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index f13ddbba8ace..a1ba41730071 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -80,6 +80,8 @@ static inline void *indirect_to_ptr(void *ptr) return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } +#define RADIX_TREE_RETRY ptr_to_indirect(NULL) + #ifdef CONFIG_RADIX_TREE_MULTIORDER /* Sibling slots point directly to another slot in the same node */ static inline bool is_sibling_entry(struct radix_tree_node *parent, void *node) @@ -1443,6 +1445,14 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) slot = to_free->slots[0]; if (!slot) break; + if (!radix_tree_is_indirect_ptr(slot) && (root->height > 1)) + break; + + if (radix_tree_is_indirect_ptr(slot)) { + slot = indirect_to_ptr(slot); + slot->parent = NULL; + slot = ptr_to_indirect(slot); + } /* * We don't need rcu_assign_pointer(), since we are simply @@ -1451,14 +1461,6 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) * (to_free->slots[0]), it will be safe to dereference the new * one (root->rnode) as far as dependent read barriers go. */ - if (root->height > 1) { - if (!radix_tree_is_indirect_ptr(slot)) - break; - - slot = indirect_to_ptr(slot); - slot->parent = NULL; - slot = ptr_to_indirect(slot); - } root->rnode = slot; root->height--; @@ -1480,9 +1482,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) * also results in a stale slot). So tag the slot as indirect * to force callers to retry. */ - if (root->height == 0) - *((unsigned long *)&to_free->slots[0]) |= - RADIX_TREE_INDIRECT_PTR; + if (!radix_tree_is_indirect_ptr(slot)) + to_free->slots[0] = RADIX_TREE_RETRY; radix_tree_node_free(to_free); } diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index cfe718c78eb6..71f34a047002 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -46,6 +46,41 @@ static void multiorder_check(unsigned long index, int order) item_check_absent(&tree, i); } +static void multiorder_shrink(unsigned long index, int order) +{ + unsigned long i; + unsigned long max = 1 << order; + RADIX_TREE(tree, GFP_KERNEL); + struct radix_tree_node *node; + + printf("Multiorder shrink index %ld, order %d\n", index, order); + + assert(item_insert_order(&tree, 0, order) == 0); + + node = tree.rnode; + + assert(item_insert(&tree, index) == 0); + assert(node != tree.rnode); + + assert(item_delete(&tree, index) != 0); + assert(node == tree.rnode); + + for (i = 0; i < max; i++) { + struct item *item = item_lookup(&tree, i); + assert(item != 0); + assert(item->index == 0); + } + for (i = max; i < 2*max; i++) + item_check_absent(&tree, i); + + if (!item_delete(&tree, 0)) { + printf("failed to delete index %ld (order %d)\n", index, order); abort(); + } + + for (i = 0; i < 2*max; i++) + item_check_absent(&tree, i); +} + void multiorder_checks(void) { int i; @@ -55,4 +90,8 @@ void multiorder_checks(void) multiorder_check(0, i); multiorder_check((1UL << i) + 1, i); } + + for (i = 0; i < 15; i++) + multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i); + } -- cgit From 7b60e9ad59a31dd98c2f7ef841e2882c2b0e0f3b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:23 -0700 Subject: radix-tree: fix multiorder BUG_ON in radix_tree_insert These BUG_ON tests are to ensure that all the tags are clear when inserting a new entry. If we insert a multiorder entry, we'll end up looking at the tags for a different node, and so the BUG_ON can end up triggering spuriously. Also, we now have three tags, not two, so check all three are clear, and check all the root tags with a single call to BUG_ON since the bits are stored contiguously. Include a test-case to ensure this problem does not reoccur. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 14 ++++++++++---- tools/testing/radix-tree/multiorder.c | 12 ++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index f14ada9830ca..ff460423ff4b 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -165,6 +165,11 @@ static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag) return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT)); } +static inline unsigned root_tags_get(struct radix_tree_root *root) +{ + return (__force unsigned)root->gfp_mask >> __GFP_BITS_SHIFT; +} + /* * Returns 1 if any slot in the node has this tag set. * Otherwise returns 0. @@ -604,12 +609,13 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, rcu_assign_pointer(*slot, item); if (node) { + unsigned offset = get_slot_offset(node, slot); node->count++; - BUG_ON(tag_get(node, 0, index & RADIX_TREE_MAP_MASK)); - BUG_ON(tag_get(node, 1, index & RADIX_TREE_MAP_MASK)); + BUG_ON(tag_get(node, 0, offset)); + BUG_ON(tag_get(node, 1, offset)); + BUG_ON(tag_get(node, 2, offset)); } else { - BUG_ON(root_tag_get(root, 0)); - BUG_ON(root_tag_get(root, 1)); + BUG_ON(root_tags_get(root)); } return 0; diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 71f34a047002..0a311a5f39de 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -81,6 +81,17 @@ static void multiorder_shrink(unsigned long index, int order) item_check_absent(&tree, i); } +static void multiorder_insert_bug(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + + item_insert(&tree, 0); + radix_tree_tag_set(&tree, 0, 0); + item_insert_order(&tree, 3 << 6, 6); + + item_kill_tree(&tree); +} + void multiorder_checks(void) { int i; @@ -94,4 +105,5 @@ void multiorder_checks(void) for (i = 0; i < 15; i++) multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i); + multiorder_insert_bug(); } -- cgit From 21ef533931f73a8e963a6107aa5ec51b192f28be Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:26 -0700 Subject: radix-tree: add support for multi-order iterating This enables the macros radix_tree_for_each_slot() and friends to be used with multi-order entries. The way that this works is that we treat all entries in a given slots[] array as a single chunk. If the index given to radix_tree_next_chunk() happens to point us to a sibling entry, we will back up iter->index so that it points to the canonical entry, and that will be the place where we start our iteration. As we're processing a chunk in radix_tree_next_slot(), we process canonical entries, skip over sibling entries, and restart the chunk lookup if we find a non-sibling indirect pointer. This drops back to the radix_tree_next_chunk() code, which will re-walk the tree and look for another chunk. This allows us to properly handle multi-order entries mixed with other entries that are at various heights in the radix tree. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 69 +++++++++++++++++++++++---- lib/radix-tree.c | 66 ++++++++++++++----------- tools/testing/radix-tree/generated/autoconf.h | 3 ++ tools/testing/radix-tree/linux/kernel.h | 5 +- 4 files changed, 102 insertions(+), 41 deletions(-) create mode 100644 tools/testing/radix-tree/generated/autoconf.h (limited to 'tools') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index e1512a607709..8558d52e1c7b 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -330,8 +330,9 @@ static inline void radix_tree_preload_end(void) * struct radix_tree_iter - radix tree iterator state * * @index: index of current slot - * @next_index: next-to-last index for this chunk + * @next_index: one beyond the last index for this chunk * @tags: bit-mask for tag-iterating + * @shift: shift for the node that holds our slots * * This radix tree iterator works in terms of "chunks" of slots. A chunk is a * subinterval of slots contained within one radix tree leaf node. It is @@ -344,8 +345,20 @@ struct radix_tree_iter { unsigned long index; unsigned long next_index; unsigned long tags; +#ifdef CONFIG_RADIX_TREE_MULTIORDER + unsigned int shift; +#endif }; +static inline unsigned int iter_shift(struct radix_tree_iter *iter) +{ +#ifdef CONFIG_RADIX_TREE_MULTIORDER + return iter->shift; +#else + return 0; +#endif +} + #define RADIX_TREE_ITER_TAG_MASK 0x00FF /* tag index in lower byte */ #define RADIX_TREE_ITER_TAGGED 0x0100 /* lookup tagged slots */ #define RADIX_TREE_ITER_CONTIG 0x0200 /* stop at first hole */ @@ -405,6 +418,12 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter) return NULL; } +static inline unsigned long +__radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots) +{ + return iter->index + (slots << iter_shift(iter)); +} + /** * radix_tree_iter_next - resume iterating when the chunk may be invalid * @iter: iterator state @@ -416,7 +435,7 @@ void **radix_tree_iter_retry(struct radix_tree_iter *iter) static inline __must_check void **radix_tree_iter_next(struct radix_tree_iter *iter) { - iter->next_index = iter->index + 1; + iter->next_index = __radix_tree_iter_add(iter, 1); iter->tags = 0; return NULL; } @@ -430,7 +449,12 @@ void **radix_tree_iter_next(struct radix_tree_iter *iter) static __always_inline long radix_tree_chunk_size(struct radix_tree_iter *iter) { - return iter->next_index - iter->index; + return (iter->next_index - iter->index) >> iter_shift(iter); +} + +static inline void *indirect_to_ptr(void *ptr) +{ + return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } /** @@ -448,24 +472,51 @@ static __always_inline void ** radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) { if (flags & RADIX_TREE_ITER_TAGGED) { + void *canon = slot; + iter->tags >>= 1; + if (unlikely(!iter->tags)) + return NULL; + while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && + radix_tree_is_indirect_ptr(slot[1])) { + if (indirect_to_ptr(slot[1]) == canon) { + iter->tags >>= 1; + iter->index = __radix_tree_iter_add(iter, 1); + slot++; + continue; + } + iter->next_index = __radix_tree_iter_add(iter, 1); + return NULL; + } if (likely(iter->tags & 1ul)) { - iter->index++; + iter->index = __radix_tree_iter_add(iter, 1); return slot + 1; } - if (!(flags & RADIX_TREE_ITER_CONTIG) && likely(iter->tags)) { + if (!(flags & RADIX_TREE_ITER_CONTIG)) { unsigned offset = __ffs(iter->tags); iter->tags >>= offset; - iter->index += offset + 1; + iter->index = __radix_tree_iter_add(iter, offset + 1); return slot + offset + 1; } } else { - long size = radix_tree_chunk_size(iter); + long count = radix_tree_chunk_size(iter); + void *canon = slot; - while (--size > 0) { + while (--count > 0) { slot++; - iter->index++; + iter->index = __radix_tree_iter_add(iter, 1); + + if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && + radix_tree_is_indirect_ptr(*slot)) { + if (indirect_to_ptr(*slot) == canon) + continue; + else { + iter->next_index = iter->index; + break; + } + } + if (likely(*slot)) return slot; if (flags & RADIX_TREE_ITER_CONTIG) { diff --git a/lib/radix-tree.c b/lib/radix-tree.c index ff460423ff4b..a4da86e40def 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -75,11 +75,6 @@ static inline void *ptr_to_indirect(void *ptr) return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR); } -static inline void *indirect_to_ptr(void *ptr) -{ - return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); -} - #define RADIX_TREE_RETRY ptr_to_indirect(NULL) #ifdef CONFIG_RADIX_TREE_MULTIORDER @@ -885,6 +880,14 @@ int radix_tree_tag_get(struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_tag_get); +static inline void __set_iter_shift(struct radix_tree_iter *iter, + unsigned int shift) +{ +#ifdef CONFIG_RADIX_TREE_MULTIORDER + iter->shift = shift; +#endif +} + /** * radix_tree_next_chunk - find next chunk of slots for iteration * @@ -898,7 +901,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, { unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; struct radix_tree_node *rnode, *node; - unsigned long index, offset, height; + unsigned long index, offset, maxindex; if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) return NULL; @@ -916,33 +919,39 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if (!index && iter->index) return NULL; - rnode = rcu_dereference_raw(root->rnode); + restart: + shift = radix_tree_load_root(root, &rnode, &maxindex); + if (index > maxindex) + return NULL; + if (radix_tree_is_indirect_ptr(rnode)) { rnode = indirect_to_ptr(rnode); - } else if (rnode && !index) { + } else if (rnode) { /* Single-slot tree */ - iter->index = 0; - iter->next_index = 1; + iter->index = index; + iter->next_index = maxindex + 1; iter->tags = 1; + __set_iter_shift(iter, shift); return (void **)&root->rnode; } else return NULL; -restart: - height = rnode->path & RADIX_TREE_HEIGHT_MASK; - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; + shift -= RADIX_TREE_MAP_SHIFT; offset = index >> shift; - /* Index outside of the tree */ - if (offset >= RADIX_TREE_MAP_SIZE) - return NULL; - node = rnode; while (1) { struct radix_tree_node *slot; + unsigned new_off = radix_tree_descend(node, &slot, offset); + + if (new_off < offset) { + offset = new_off; + index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); + index |= offset << shift; + } + if ((flags & RADIX_TREE_ITER_TAGGED) ? - !test_bit(offset, node->tags[tag]) : - !node->slots[offset]) { + !tag_get(node, tag, offset) : !slot) { /* Hole detected */ if (flags & RADIX_TREE_ITER_CONTIG) return NULL; @@ -954,7 +963,10 @@ restart: offset + 1); else while (++offset < RADIX_TREE_MAP_SIZE) { - if (node->slots[offset]) + void *slot = node->slots[offset]; + if (is_sibling_entry(node, slot)) + continue; + if (slot) break; } index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); @@ -964,25 +976,23 @@ restart: return NULL; if (offset == RADIX_TREE_MAP_SIZE) goto restart; + slot = rcu_dereference_raw(node->slots[offset]); } - /* This is leaf-node */ - if (!shift) - break; - - slot = rcu_dereference_raw(node->slots[offset]); - if (slot == NULL) + if ((slot == NULL) || (slot == RADIX_TREE_RETRY)) goto restart; if (!radix_tree_is_indirect_ptr(slot)) break; + node = indirect_to_ptr(slot); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; } /* Update the iterator state */ - iter->index = index; - iter->next_index = (index | RADIX_TREE_MAP_MASK) + 1; + iter->index = index & ~((1 << shift) - 1); + iter->next_index = (index | ((RADIX_TREE_MAP_SIZE << shift) - 1)) + 1; + __set_iter_shift(iter, shift); /* Construct iter->tags bit-mask from node->tags[tag] array */ if (flags & RADIX_TREE_ITER_TAGGED) { diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h new file mode 100644 index 000000000000..ad18cf5a2a3a --- /dev/null +++ b/tools/testing/radix-tree/generated/autoconf.h @@ -0,0 +1,3 @@ +#define CONFIG_RADIX_TREE_MULTIORDER 1 +#define CONFIG_SHMEM 1 +#define CONFIG_SWAP 1 diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 8ea0ed450810..be98a47b4e1b 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -8,10 +8,7 @@ #include #include "../../include/linux/compiler.h" - -#define CONFIG_RADIX_TREE_MULTIORDER -#define CONFIG_SHMEM -#define CONFIG_SWAP +#include "../../../include/linux/kconfig.h" #define RADIX_TREE_MAP_SHIFT 3 -- cgit From 643b57d0a9bd4c93625a2f5da4cebc3ceb402b9b Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:29 -0700 Subject: radix tree test suite: multi-order iteration test Add a unit test to verify that we can iterate over multi-order entries properly via a radix_tree_for_each_slot() loop. This was done with a single, somewhat complicated configuration that was meant to test many of the various corner cases having to do with multi-order entries: - An iteration could begin at a sibling entry, and we need to return the canonical entry. - We could have entries of various orders in the same slots[] array. - We could have multi-order entries at a nonzero height, followed by indirect pointers to more radix tree nodes later in that same slots[] array. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/multiorder.c | 92 +++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'tools') diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 0a311a5f39de..ba27fe0a579c 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -92,6 +92,96 @@ static void multiorder_insert_bug(void) item_kill_tree(&tree); } +void multiorder_iteration(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + struct radix_tree_iter iter; + void **slot; + int i, err; + + printf("Multiorder iteration test\n"); + +#define NUM_ENTRIES 11 + int index[NUM_ENTRIES] = {0, 2, 4, 8, 16, 32, 34, 36, 64, 72, 128}; + int order[NUM_ENTRIES] = {1, 1, 2, 3, 4, 1, 0, 1, 3, 0, 7}; + + for (i = 0; i < NUM_ENTRIES; i++) { + err = item_insert_order(&tree, index[i], order[i]); + assert(!err); + } + + i = 0; + /* start from index 1 to verify we find the multi-order entry at 0 */ + radix_tree_for_each_slot(slot, &tree, &iter, 1) { + int height = order[i] / RADIX_TREE_MAP_SHIFT; + int shift = height * RADIX_TREE_MAP_SHIFT; + + assert(iter.index == index[i]); + assert(iter.shift == shift); + i++; + } + + /* + * Now iterate through the tree starting at an elevated multi-order + * entry, beginning at an index in the middle of the range. + */ + i = 8; + radix_tree_for_each_slot(slot, &tree, &iter, 70) { + int height = order[i] / RADIX_TREE_MAP_SHIFT; + int shift = height * RADIX_TREE_MAP_SHIFT; + + assert(iter.index == index[i]); + assert(iter.shift == shift); + i++; + } + + item_kill_tree(&tree); +} + +void multiorder_tagged_iteration(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + struct radix_tree_iter iter; + void **slot; + int i; + + printf("Multiorder tagged iteration test\n"); + +#define MT_NUM_ENTRIES 9 + int index[MT_NUM_ENTRIES] = {0, 2, 4, 16, 32, 40, 64, 72, 128}; + int order[MT_NUM_ENTRIES] = {1, 0, 2, 4, 3, 1, 3, 0, 7}; + +#define TAG_ENTRIES 7 + int tag_index[TAG_ENTRIES] = {0, 4, 16, 40, 64, 72, 128}; + + for (i = 0; i < MT_NUM_ENTRIES; i++) + assert(!item_insert_order(&tree, index[i], order[i])); + + assert(!radix_tree_tagged(&tree, 1)); + + for (i = 0; i < TAG_ENTRIES; i++) + assert(radix_tree_tag_set(&tree, tag_index[i], 1)); + + i = 0; + /* start from index 1 to verify we find the multi-order entry at 0 */ + radix_tree_for_each_tagged(slot, &tree, &iter, 1, 1) { + assert(iter.index == tag_index[i]); + i++; + } + + /* + * Now iterate through the tree starting at an elevated multi-order + * entry, beginning at an index in the middle of the range. + */ + i = 4; + radix_tree_for_each_slot(slot, &tree, &iter, 70) { + assert(iter.index == tag_index[i]); + i++; + } + + item_kill_tree(&tree); +} + void multiorder_checks(void) { int i; @@ -106,4 +196,6 @@ void multiorder_checks(void) multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i); multiorder_insert_bug(); + multiorder_iteration(); + multiorder_tagged_iteration(); } -- cgit From 0fc9b8ca2b1df4948e9516697b1cf12f030968bd Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:41 -0700 Subject: radix-tree test suite: add multi-order tag test Add a generic test for multi-order tag verification, and call it using several different configurations. This test creates a multi-order radix tree using the given index and order, and then sets, checks and clears tags using the indices covered by the single multi-order radix tree entry. With the various calls done by this test we verify root multi-order entries without siblings, multi-order entries without siblings in a radix tree node, as well as multi-order entries with siblings of various sizes. Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/multiorder.c | 97 +++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) (limited to 'tools') diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index ba27fe0a579c..1b6fc9b19930 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -19,6 +19,102 @@ #include "test.h" +#define for_each_index(i, base, order) \ + for (i = base; i < base + (1 << order); i++) + +static void __multiorder_tag_test(int index, int order) +{ + RADIX_TREE(tree, GFP_KERNEL); + int base, err, i; + + /* our canonical entry */ + base = index & ~((1 << order) - 1); + + printf("Multiorder tag test with index %d, canonical entry %d\n", + index, base); + + err = item_insert_order(&tree, index, order); + assert(!err); + + /* + * Verify we get collisions for covered indices. We try and fail to + * insert an exceptional entry so we don't leak memory via + * item_insert_order(). + */ + for_each_index(i, base, order) { + err = __radix_tree_insert(&tree, i, order, + (void *)(0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY)); + assert(err == -EEXIST); + } + + for_each_index(i, base, order) { + assert(!radix_tree_tag_get(&tree, i, 0)); + assert(!radix_tree_tag_get(&tree, i, 1)); + } + + assert(radix_tree_tag_set(&tree, index, 0)); + + for_each_index(i, base, order) { + assert(radix_tree_tag_get(&tree, i, 0)); + assert(!radix_tree_tag_get(&tree, i, 1)); + } + + assert(radix_tree_tag_clear(&tree, index, 0)); + + for_each_index(i, base, order) { + assert(!radix_tree_tag_get(&tree, i, 0)); + assert(!radix_tree_tag_get(&tree, i, 1)); + } + + assert(!radix_tree_tagged(&tree, 0)); + assert(!radix_tree_tagged(&tree, 1)); + + item_kill_tree(&tree); +} + +static void multiorder_tag_tests(void) +{ + /* test multi-order entry for indices 0-7 with no sibling pointers */ + __multiorder_tag_test(0, 3); + __multiorder_tag_test(5, 3); + + /* test multi-order entry for indices 8-15 with no sibling pointers */ + __multiorder_tag_test(8, 3); + __multiorder_tag_test(15, 3); + + /* + * Our order 5 entry covers indices 0-31 in a tree with height=2. + * This is broken up as follows: + * 0-7: canonical entry + * 8-15: sibling 1 + * 16-23: sibling 2 + * 24-31: sibling 3 + */ + __multiorder_tag_test(0, 5); + __multiorder_tag_test(29, 5); + + /* same test, but with indices 32-63 */ + __multiorder_tag_test(32, 5); + __multiorder_tag_test(44, 5); + + /* + * Our order 8 entry covers indices 0-255 in a tree with height=3. + * This is broken up as follows: + * 0-63: canonical entry + * 64-127: sibling 1 + * 128-191: sibling 2 + * 192-255: sibling 3 + */ + __multiorder_tag_test(0, 8); + __multiorder_tag_test(190, 8); + + /* same test, but with indices 256-511 */ + __multiorder_tag_test(256, 8); + __multiorder_tag_test(300, 8); + + __multiorder_tag_test(0x12345678UL, 8); +} + static void multiorder_check(unsigned long index, int order) { unsigned long i; @@ -196,6 +292,7 @@ void multiorder_checks(void) multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i); multiorder_insert_bug(); + multiorder_tag_tests(); multiorder_iteration(); multiorder_tagged_iteration(); } -- cgit From 8a14f4d8328cc8615f8a5487c4173f36a8314796 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:44 -0700 Subject: radix-tree: fix radix_tree_create for sibling entries If the radix tree user attempted to insert a colliding entry with an existing multiorder entry, then radix_tree_create() could encounter a sibling entry when walking down the tree to look for a slot. Use radix_tree_descend() to fix the problem, and add a test-case to make sure the problem doesn't come back in future. Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 4 ++-- tools/testing/radix-tree/multiorder.c | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index b1ca74489bc2..9b5d8a963897 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -548,9 +548,9 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, /* Go a level down */ height--; shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; node = indirect_to_ptr(slot); - slot = node->slots[offset]; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + offset = radix_tree_descend(node, &slot, offset); } #ifdef CONFIG_RADIX_TREE_MULTIORDER diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 1b6fc9b19930..fc934578e1ef 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -135,6 +135,11 @@ static void multiorder_check(unsigned long index, int order) item_check_absent(&tree, i); for (i = max; i < 2*max; i++) item_check_absent(&tree, i); + for (i = min; i < max; i++) { + static void *entry = (void *) + (0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY); + assert(radix_tree_insert(&tree, i, entry) == -EEXIST); + } assert(item_delete(&tree, index) != 0); -- cgit From 0a2efc6c809b01872321d9c7e7d82d59ac6fde10 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:46 -0700 Subject: radix-tree: rewrite radix_tree_locate_item Use the new multi-order support functions to rewrite radix_tree_locate_item(). Modify the locate tests to test multiorder entries too. [hughd@google.com: radix_tree_locate_item() is often returning the wrong index] Link: http://lkml.kernel.org/r/alpine.LSU.2.11.1605012108490.1166@eggly.anvils Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 87 ++++++++++++++++++++--------------------- tools/testing/radix-tree/main.c | 30 ++++++++------ 2 files changed, 61 insertions(+), 56 deletions(-) (limited to 'tools') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 9b5d8a963897..8329a2e950eb 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1303,58 +1303,54 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); #if defined(CONFIG_SHMEM) && defined(CONFIG_SWAP) #include /* for cond_resched() */ +struct locate_info { + unsigned long found_index; + bool stop; +}; + /* * This linear search is at present only useful to shmem_unuse_inode(). */ static unsigned long __locate(struct radix_tree_node *slot, void *item, - unsigned long index, unsigned long *found_index) + unsigned long index, struct locate_info *info) { unsigned int shift, height; unsigned long i; height = slot->path & RADIX_TREE_HEIGHT_MASK; - shift = (height-1) * RADIX_TREE_MAP_SHIFT; + shift = height * RADIX_TREE_MAP_SHIFT; - for ( ; height > 1; height--) { - i = (index >> shift) & RADIX_TREE_MAP_MASK; - for (;;) { - if (slot->slots[i] != NULL) - break; - index &= ~((1UL << shift) - 1); - index += 1UL << shift; - if (index == 0) - goto out; /* 32-bit wraparound */ - i++; - if (i == RADIX_TREE_MAP_SIZE) - goto out; - } + do { + shift -= RADIX_TREE_MAP_SHIFT; - slot = rcu_dereference_raw(slot->slots[i]); - if (slot == NULL) - goto out; - if (!radix_tree_is_indirect_ptr(slot)) { - if (slot == item) { - *found_index = index + i; - index = 0; - } else { - index += shift; + for (i = (index >> shift) & RADIX_TREE_MAP_MASK; + i < RADIX_TREE_MAP_SIZE; + i++, index += (1UL << shift)) { + struct radix_tree_node *node = + rcu_dereference_raw(slot->slots[i]); + if (node == RADIX_TREE_RETRY) + goto out; + if (!radix_tree_is_indirect_ptr(node)) { + if (node == item) { + info->found_index = index; + info->stop = true; + goto out; + } + continue; } - goto out; + node = indirect_to_ptr(node); + if (is_sibling_entry(slot, node)) + continue; + slot = node; + break; } - slot = indirect_to_ptr(slot); - shift -= RADIX_TREE_MAP_SHIFT; - } + if (i == RADIX_TREE_MAP_SIZE) + break; + } while (shift); - /* Bottom level: check items */ - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { - if (slot->slots[i] == item) { - *found_index = index + i; - index = 0; - goto out; - } - } - index += RADIX_TREE_MAP_SIZE; out: + if ((index == 0) && (i == RADIX_TREE_MAP_SIZE)) + info->stop = true; return index; } @@ -1372,7 +1368,10 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) struct radix_tree_node *node; unsigned long max_index; unsigned long cur_index = 0; - unsigned long found_index = -1; + struct locate_info info = { + .found_index = -1, + .stop = false, + }; do { rcu_read_lock(); @@ -1380,24 +1379,24 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) if (!radix_tree_is_indirect_ptr(node)) { rcu_read_unlock(); if (node == item) - found_index = 0; + info.found_index = 0; break; } node = indirect_to_ptr(node); - max_index = radix_tree_maxindex(node->path & - RADIX_TREE_HEIGHT_MASK); + + max_index = node_maxindex(node); if (cur_index > max_index) { rcu_read_unlock(); break; } - cur_index = __locate(node, item, cur_index, &found_index); + cur_index = __locate(node, item, cur_index, &info); rcu_read_unlock(); cond_resched(); - } while (cur_index != 0 && cur_index <= max_index); + } while (!info.stop && cur_index <= max_index); - return found_index; + return info.found_index; } #else unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index b6a700b00cce..65231e9ba3e8 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -232,17 +232,18 @@ void copy_tag_check(void) item_kill_tree(&tree); } -void __locate_check(struct radix_tree_root *tree, unsigned long index) +void __locate_check(struct radix_tree_root *tree, unsigned long index, + unsigned order) { struct item *item; unsigned long index2; - item_insert(tree, index); + item_insert_order(tree, index, order); item = item_lookup(tree, index); index2 = radix_tree_locate_item(tree, item); if (index != index2) { - printf("index %ld inserted; found %ld\n", - index, index2); + printf("index %ld order %d inserted; found %ld\n", + index, order, index2); abort(); } } @@ -250,21 +251,26 @@ void __locate_check(struct radix_tree_root *tree, unsigned long index) static void locate_check(void) { RADIX_TREE(tree, GFP_KERNEL); + unsigned order; unsigned long offset, index; - for (offset = 0; offset < (1 << 3); offset++) { - for (index = 0; index < (1UL << 5); index++) { - __locate_check(&tree, index + offset); - } - if (radix_tree_locate_item(&tree, &tree) != -1) - abort(); + for (order = 0; order < 20; order++) { + for (offset = 0; offset < (1 << (order + 3)); + offset += (1UL << order)) { + for (index = 0; index < (1UL << (order + 5)); + index += (1UL << order)) { + __locate_check(&tree, index + offset, order); + } + if (radix_tree_locate_item(&tree, &tree) != -1) + abort(); - item_kill_tree(&tree); + item_kill_tree(&tree); + } } if (radix_tree_locate_item(&tree, &tree) != -1) abort(); - __locate_check(&tree, -1); + __locate_check(&tree, -1, 0); if (radix_tree_locate_item(&tree, &tree) != -1) abort(); item_kill_tree(&tree); -- cgit From eb73f7f3300c144c4b886dd56ea4c3d2b2d58249 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:49 -0700 Subject: radix-tree: add test for radix_tree_locate_item() Add a unit test that provides coverage for the bug fixed in the commit entitled "radix-tree: rewrite radix_tree_locate_item fix" from Hugh Dickins. I've verified that this test fails before his patch due to miscalculated 'index' values in __locate() in lib/radix-tree.c, and passes with his fix. Link: http://lkml.kernel.org/r/1462307263-20623-1-git-send-email-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/linux/init.h | 1 + tools/testing/radix-tree/main.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 tools/testing/radix-tree/linux/init.h (limited to 'tools') diff --git a/tools/testing/radix-tree/linux/init.h b/tools/testing/radix-tree/linux/init.h new file mode 100644 index 000000000000..360cabb3c4e7 --- /dev/null +++ b/tools/testing/radix-tree/linux/init.h @@ -0,0 +1 @@ +/* An empty file stub that allows radix-tree.c to compile. */ diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index 65231e9ba3e8..b7619ff3b552 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -232,7 +232,7 @@ void copy_tag_check(void) item_kill_tree(&tree); } -void __locate_check(struct radix_tree_root *tree, unsigned long index, +static void __locate_check(struct radix_tree_root *tree, unsigned long index, unsigned order) { struct item *item; @@ -248,12 +248,25 @@ void __locate_check(struct radix_tree_root *tree, unsigned long index, } } +static void __order_0_locate_check(void) +{ + RADIX_TREE(tree, GFP_KERNEL); + int i; + + for (i = 0; i < 50; i++) + __locate_check(&tree, rand() % INT_MAX, 0); + + item_kill_tree(&tree); +} + static void locate_check(void) { RADIX_TREE(tree, GFP_KERNEL); unsigned order; unsigned long offset, index; + __order_0_locate_check(); + for (order = 0; order < 20; order++) { for (offset = 0; offset < (1 << (order + 3)); offset += (1UL << order)) { -- cgit From 070c5ac2740b5db89d381a09fb03b2480b2f7a74 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:02:52 -0700 Subject: radix-tree: fix radix_tree_range_tag_if_tagged() for multiorder entries I had previously decided that tagging a single multiorder entry would count as tagging 2^order entries for the purposes of 'nr_to_tag'. I now believe that decision to be a mistake, and it should count as a single entry. That's more likely to be what callers expect. When walking back up the tree from a newly-tagged entry, the current code assumed we were starting from the lowest level of the tree; if we have a multiorder entry with an order at least RADIX_TREE_MAP_SHIFT in size then we need to shift the index by 'shift' before we start walking back up the tree, or we will end up not setting tags on higher entries, and then mistakenly thinking that entries below a certain point in the tree are not tagged. If the first index we examine is a sibling entry of a tagged multiorder entry, we were not tagging it. We need to examine the canonical entry, and the easiest way to do that is to use radix_tree_descend(). We then have to skip over sibling slots when looking for the next entry in the tree or we will end up walking back to the canonical entry. Add several tests for radix_tree_range_tag_if_tagged(). Signed-off-by: Matthew Wilcox Reviewed-by: Ross Zwisler Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 76 +++++++++++++++-------------------- tools/testing/radix-tree/multiorder.c | 25 +++++++++++- tools/testing/radix-tree/tag_check.c | 10 +++++ 3 files changed, 67 insertions(+), 44 deletions(-) (limited to 'tools') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8329a2e950eb..8df0df2835b4 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1033,14 +1033,13 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, unsigned long nr_to_tag, unsigned int iftag, unsigned int settag) { - unsigned int height = root->height; - struct radix_tree_node *node = NULL; - struct radix_tree_node *slot; - unsigned int shift; + struct radix_tree_node *slot, *node = NULL; + unsigned long maxindex; + unsigned int shift = radix_tree_load_root(root, &slot, &maxindex); unsigned long tagged = 0; unsigned long index = *first_indexp; - last_index = min(last_index, radix_tree_maxindex(height)); + last_index = min(last_index, maxindex); if (index > last_index) return 0; if (!nr_to_tag) @@ -1049,80 +1048,71 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, *first_indexp = last_index + 1; return 0; } - if (height == 0) { + if (!radix_tree_is_indirect_ptr(slot)) { *first_indexp = last_index + 1; root_tag_set(root, settag); return 1; } - shift = (height - 1) * RADIX_TREE_MAP_SHIFT; - slot = indirect_to_ptr(root->rnode); + node = indirect_to_ptr(slot); + shift -= RADIX_TREE_MAP_SHIFT; for (;;) { unsigned long upindex; - int offset; + unsigned offset; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - if (!slot->slots[offset]) + offset = radix_tree_descend(node, &slot, offset); + if (!slot) goto next; - if (!tag_get(slot, iftag, offset)) + if (!tag_get(node, iftag, offset)) goto next; - if (shift) { - node = slot; - slot = slot->slots[offset]; - if (radix_tree_is_indirect_ptr(slot)) { - slot = indirect_to_ptr(slot); - shift -= RADIX_TREE_MAP_SHIFT; - continue; - } else { - slot = node; - node = node->parent; - } + /* Sibling slots never have tags set on them */ + if (radix_tree_is_indirect_ptr(slot)) { + node = indirect_to_ptr(slot); + shift -= RADIX_TREE_MAP_SHIFT; + continue; } /* tag the leaf */ - tagged += 1 << shift; - tag_set(slot, settag, offset); + tagged++; + tag_set(node, settag, offset); + slot = node->parent; /* walk back up the path tagging interior nodes */ - upindex = index; - while (node) { + upindex = index >> shift; + while (slot) { upindex >>= RADIX_TREE_MAP_SHIFT; offset = upindex & RADIX_TREE_MAP_MASK; /* stop if we find a node with the tag already set */ - if (tag_get(node, settag, offset)) + if (tag_get(slot, settag, offset)) break; - tag_set(node, settag, offset); - node = node->parent; + tag_set(slot, settag, offset); + slot = slot->parent; } - /* - * Small optimization: now clear that node pointer. - * Since all of this slot's ancestors now have the tag set - * from setting it above, we have no further need to walk - * back up the tree setting tags, until we update slot to - * point to another radix_tree_node. - */ - node = NULL; - -next: + next: /* Go to next item at level determined by 'shift' */ index = ((index >> shift) + 1) << shift; /* Overflow can happen when last_index is ~0UL... */ if (index > last_index || !index) break; - if (tagged >= nr_to_tag) - break; - while (((index >> shift) & RADIX_TREE_MAP_MASK) == 0) { + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + while (offset == 0) { /* * We've fully scanned this node. Go up. Because * last_index is guaranteed to be in the tree, what * we do below cannot wander astray. */ - slot = slot->parent; + node = node->parent; shift += RADIX_TREE_MAP_SHIFT; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; } + if (is_sibling_entry(node, node->slots[offset])) + goto next; + if (tagged >= nr_to_tag) + break; } /* * We need not to tag the root tag if there is no tag which is set with diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index fc934578e1ef..c061f4bd6c05 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -26,6 +26,7 @@ static void __multiorder_tag_test(int index, int order) { RADIX_TREE(tree, GFP_KERNEL); int base, err, i; + unsigned long first = 0; /* our canonical entry */ base = index & ~((1 << order) - 1); @@ -59,13 +60,16 @@ static void __multiorder_tag_test(int index, int order) assert(!radix_tree_tag_get(&tree, i, 1)); } + assert(radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, 10, 0, 1) == 1); assert(radix_tree_tag_clear(&tree, index, 0)); for_each_index(i, base, order) { assert(!radix_tree_tag_get(&tree, i, 0)); - assert(!radix_tree_tag_get(&tree, i, 1)); + assert(radix_tree_tag_get(&tree, i, 1)); } + assert(radix_tree_tag_clear(&tree, index, 1)); + assert(!radix_tree_tagged(&tree, 0)); assert(!radix_tree_tagged(&tree, 1)); @@ -244,6 +248,7 @@ void multiorder_tagged_iteration(void) RADIX_TREE(tree, GFP_KERNEL); struct radix_tree_iter iter; void **slot; + unsigned long first = 0; int i; printf("Multiorder tagged iteration test\n"); @@ -280,6 +285,24 @@ void multiorder_tagged_iteration(void) i++; } + radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, + MT_NUM_ENTRIES, 1, 2); + + i = 0; + radix_tree_for_each_tagged(slot, &tree, &iter, 1, 2) { + assert(iter.index == tag_index[i]); + i++; + } + + first = 1; + radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, + MT_NUM_ENTRIES, 1, 0); + i = 0; + radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) { + assert(iter.index == tag_index[i]); + i++; + } + item_kill_tree(&tree); } diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c index 83136be552a0..b7447ceb75e9 100644 --- a/tools/testing/radix-tree/tag_check.c +++ b/tools/testing/radix-tree/tag_check.c @@ -12,6 +12,7 @@ static void __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag) { + unsigned long first = 0; int ret; item_check_absent(tree, index); @@ -22,6 +23,10 @@ __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag) item_tag_set(tree, index, tag); ret = item_tag_get(tree, index, tag); assert(ret != 0); + ret = radix_tree_range_tag_if_tagged(tree, &first, ~0UL, 10, tag, !tag); + assert(ret == 1); + ret = item_tag_get(tree, index, !tag); + assert(ret != 0); ret = item_delete(tree, index); assert(ret != 0); item_insert(tree, index); @@ -304,6 +309,7 @@ static void single_check(void) struct item *items[BATCH]; RADIX_TREE(tree, GFP_KERNEL); int ret; + unsigned long first = 0; item_insert(&tree, 0); item_tag_set(&tree, 0, 0); @@ -313,6 +319,10 @@ static void single_check(void) assert(ret == 0); verify_tag_consistency(&tree, 0); verify_tag_consistency(&tree, 1); + ret = radix_tree_range_tag_if_tagged(&tree, &first, 10, 10, 0, 1); + assert(ret == 1); + ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1); + assert(ret == 1); item_kill_tree(&tree); } -- cgit From 0796c58325533f87c00949a545eb607baa8441cb Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 20 May 2016 17:02:55 -0700 Subject: radix-tree: fix radix_tree_dump() for multi-order entries - Print which indices are covered by every leaf entry - Print sibling entries - Print the node pointer instead of the slot entry - Build by default in userspace, and make it accessible to the test-suite Signed-off-by: Ross Zwisler Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 48 +++++++++++++++++++++++++---------------- tools/testing/radix-tree/test.h | 1 + 2 files changed, 30 insertions(+), 19 deletions(-) (limited to 'tools') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8df0df2835b4..a1a44f94c171 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -215,27 +215,36 @@ radix_tree_find_next_bit(const unsigned long *addr, return size; } -#if 0 -static void dump_node(void *slot, int height, int offset) +#ifndef __KERNEL__ +static void dump_node(struct radix_tree_node *node, unsigned offset, + unsigned shift, unsigned long index) { - struct radix_tree_node *node; - int i; - - if (!slot) - return; - - if (height == 0) { - pr_debug("radix entry %p offset %d\n", slot, offset); - return; - } + unsigned long i; - node = indirect_to_ptr(slot); pr_debug("radix node: %p offset %d tags %lx %lx %lx path %x count %d parent %p\n", - slot, offset, node->tags[0][0], node->tags[1][0], - node->tags[2][0], node->path, node->count, node->parent); - - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) - dump_node(node->slots[i], height - 1, i); + node, offset, + node->tags[0][0], node->tags[1][0], node->tags[2][0], + node->path, node->count, node->parent); + + for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { + unsigned long first = index | (i << shift); + unsigned long last = first | ((1UL << shift) - 1); + void *entry = node->slots[i]; + if (!entry) + continue; + if (is_sibling_entry(node, entry)) { + pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n", + entry, i, + *(void **)indirect_to_ptr(entry), + first, last); + } else if (!radix_tree_is_indirect_ptr(entry)) { + pr_debug("radix entry %p offset %ld indices %ld-%ld\n", + entry, i, first, last); + } else { + dump_node(indirect_to_ptr(entry), i, + shift - RADIX_TREE_MAP_SHIFT, first); + } + } } /* For debug */ @@ -246,7 +255,8 @@ static void radix_tree_dump(struct radix_tree_root *root) root->gfp_mask >> __GFP_BITS_SHIFT); if (!radix_tree_is_indirect_ptr(root->rnode)) return; - dump_node(root->rnode, root->height, 0); + dump_node(indirect_to_ptr(root->rnode), 0, + (root->height - 1) * RADIX_TREE_MAP_SHIFT, 0); } #endif diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 53cb595db44a..67217c93fe95 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -40,5 +40,6 @@ extern int nr_allocated; /* Normally private parts of lib/radix-tree.c */ void *indirect_to_ptr(void *ptr); +void radix_tree_dump(struct radix_tree_root *root); int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long radix_tree_maxindex(unsigned int height); -- cgit From 0694f0c9e20c47063e4237e5f6649ae5ce5a369a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:16 -0700 Subject: radix tree test suite: remove dependencies on height verify_node() can use node->shift instead of the height. tree_verify_min_height() can be converted over to using node_maxindex() and shift_maxindex() instead of radix_tree_maxindex(). Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/test.c | 34 +++++++++++++++++++++++----------- tools/testing/radix-tree/test.h | 3 ++- 2 files changed, 25 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index da54f11e8ba7..3004c58b9021 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -143,7 +143,7 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start, } static int verify_node(struct radix_tree_node *slot, unsigned int tag, - unsigned int height, int tagged) + int tagged) { int anyset = 0; int i; @@ -159,7 +159,8 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, } } if (tagged != anyset) { - printf("tag: %u, height %u, tagged: %d, anyset: %d\n", tag, height, tagged, anyset); + printf("tag: %u, shift %u, tagged: %d, anyset: %d\n", + tag, slot->shift, tagged, anyset); for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) { printf("tag %d: ", j); for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) @@ -171,10 +172,10 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, assert(tagged == anyset); /* Go for next level */ - if (height > 1) { + if (slot->shift > 0) { for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) if (slot->slots[i]) - if (verify_node(slot->slots[i], tag, height - 1, + if (verify_node(slot->slots[i], tag, !!test_bit(i, slot->tags[tag]))) { printf("Failure at off %d\n", i); for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) { @@ -191,9 +192,10 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag) { - if (!root->height) + struct radix_tree_node *node = root->rnode; + if (!radix_tree_is_indirect_ptr(node)) return; - verify_node(root->rnode, tag, root->height, !!root_tag_get(root, tag)); + verify_node(node, tag, !!root_tag_get(root, tag)); } void item_kill_tree(struct radix_tree_root *root) @@ -218,9 +220,19 @@ void item_kill_tree(struct radix_tree_root *root) void tree_verify_min_height(struct radix_tree_root *root, int maxindex) { - assert(radix_tree_maxindex(root->height) >= maxindex); - if (root->height > 1) - assert(radix_tree_maxindex(root->height-1) < maxindex); - else if (root->height == 1) - assert(radix_tree_maxindex(root->height-1) <= maxindex); + unsigned shift; + struct radix_tree_node *node = root->rnode; + if (!radix_tree_is_indirect_ptr(node)) { + assert(maxindex == 0); + return; + } + + node = indirect_to_ptr(node); + assert(maxindex <= node_maxindex(node)); + + shift = node->shift; + if (shift > 0) + assert(maxindex > shift_maxindex(shift - RADIX_TREE_MAP_SHIFT)); + else + assert(maxindex > 0); } diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 67217c93fe95..866c8c676aa4 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -42,4 +42,5 @@ extern int nr_allocated; void *indirect_to_ptr(void *ptr); void radix_tree_dump(struct radix_tree_root *root); int root_tag_get(struct radix_tree_root *root, unsigned int tag); -unsigned long radix_tree_maxindex(unsigned int height); +unsigned long node_maxindex(struct radix_tree_node *); +unsigned long shift_maxindex(unsigned int shift); -- cgit From 4dd6c0987ca43d6544f4f0a3f86f6ea3bfc60fc1 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:27 -0700 Subject: radix-tree: rename indirect_to_ptr() to entry_to_node() Mirrors the earlier commit introducing node_to_entry(). Also change the type returned to be a struct radix_tree_node pointer. That lets us simplify a couple of places in the radix tree shrink & extend paths where we could convert an entry into a pointer, modify the node, then convert the pointer back into an entry. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 12 +++++------ lib/radix-tree.c | 48 ++++++++++++++++++----------------------- tools/testing/radix-tree/test.c | 4 ++-- tools/testing/radix-tree/test.h | 1 - 4 files changed, 28 insertions(+), 37 deletions(-) (limited to 'tools') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c8cc879046c7..b94aa198dd6b 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -442,7 +442,7 @@ radix_tree_chunk_size(struct radix_tree_iter *iter) return (iter->next_index - iter->index) >> iter_shift(iter); } -static inline void *indirect_to_ptr(void *ptr) +static inline struct radix_tree_node *entry_to_node(void *ptr) { return (void *)((unsigned long)ptr & ~RADIX_TREE_INTERNAL_NODE); } @@ -469,7 +469,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) return NULL; while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && radix_tree_is_indirect_ptr(slot[1])) { - if (indirect_to_ptr(slot[1]) == canon) { + if (entry_to_node(slot[1]) == canon) { iter->tags >>= 1; iter->index = __radix_tree_iter_add(iter, 1); slot++; @@ -499,12 +499,10 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && radix_tree_is_indirect_ptr(*slot)) { - if (indirect_to_ptr(*slot) == canon) + if (entry_to_node(*slot) == canon) continue; - else { - iter->next_index = iter->index; - break; - } + iter->next_index = iter->index; + break; } if (likely(*slot)) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index f66bb3932452..3c3fdd9c5bb3 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -230,13 +230,13 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) if (is_sibling_entry(node, entry)) { pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n", entry, i, - *(void **)indirect_to_ptr(entry), + *(void **)entry_to_node(entry), first, last); } else if (!radix_tree_is_indirect_ptr(entry)) { pr_debug("radix entry %p offset %ld indices %ld-%ld\n", entry, i, first, last); } else { - dump_node(indirect_to_ptr(entry), first); + dump_node(entry_to_node(entry), first); } } } @@ -249,7 +249,7 @@ static void radix_tree_dump(struct radix_tree_root *root) root->gfp_mask >> __GFP_BITS_SHIFT); if (!radix_tree_is_indirect_ptr(root->rnode)) return; - dump_node(indirect_to_ptr(root->rnode), 0); + dump_node(entry_to_node(root->rnode), 0); } #endif @@ -422,7 +422,7 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root, *nodep = node; if (likely(radix_tree_is_indirect_ptr(node))) { - node = indirect_to_ptr(node); + node = entry_to_node(node); *maxindex = node_maxindex(node); return node->shift + RADIX_TREE_MAP_SHIFT; } @@ -467,11 +467,8 @@ static int radix_tree_extend(struct radix_tree_root *root, node->offset = 0; node->count = 1; node->parent = NULL; - if (radix_tree_is_indirect_ptr(slot)) { - slot = indirect_to_ptr(slot); - slot->parent = node; - slot = node_to_entry(slot); - } + if (radix_tree_is_indirect_ptr(slot)) + entry_to_node(slot)->parent = node; node->slots[0] = slot; slot = node_to_entry(node); rcu_assign_pointer(root->rnode, slot); @@ -542,7 +539,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, break; /* Go a level down */ - node = indirect_to_ptr(slot); + node = entry_to_node(slot); offset = (index >> shift) & RADIX_TREE_MAP_MASK; offset = radix_tree_descend(node, &slot, offset); } @@ -645,7 +642,7 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, if (node == RADIX_TREE_RETRY) goto restart; - parent = indirect_to_ptr(node); + parent = entry_to_node(node); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; offset = radix_tree_descend(parent, &node, offset); @@ -729,7 +726,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root, shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - parent = indirect_to_ptr(node); + parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, offset); BUG_ON(!node); @@ -777,7 +774,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - parent = indirect_to_ptr(node); + parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, offset); } @@ -844,7 +841,7 @@ int radix_tree_tag_get(struct radix_tree_root *root, shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; - parent = indirect_to_ptr(node); + parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, offset); if (!node) @@ -904,7 +901,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, return NULL; if (radix_tree_is_indirect_ptr(rnode)) { - rnode = indirect_to_ptr(rnode); + rnode = entry_to_node(rnode); } else if (rnode) { /* Single-slot tree */ iter->index = index; @@ -963,7 +960,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if (!radix_tree_is_indirect_ptr(slot)) break; - node = indirect_to_ptr(slot); + node = entry_to_node(slot); shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; } @@ -1048,7 +1045,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, return 1; } - node = indirect_to_ptr(slot); + node = entry_to_node(slot); shift -= RADIX_TREE_MAP_SHIFT; for (;;) { @@ -1063,7 +1060,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, goto next; /* Sibling slots never have tags set on them */ if (radix_tree_is_indirect_ptr(slot)) { - node = indirect_to_ptr(slot); + node = entry_to_node(slot); shift -= RADIX_TREE_MAP_SHIFT; continue; } @@ -1322,7 +1319,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, } continue; } - node = indirect_to_ptr(node); + node = entry_to_node(node); if (is_sibling_entry(slot, node)) continue; slot = node; @@ -1367,7 +1364,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) break; } - node = indirect_to_ptr(node); + node = entry_to_node(node); max_index = node_maxindex(node); if (cur_index > max_index) { @@ -1403,7 +1400,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) if (!radix_tree_is_indirect_ptr(to_free)) break; - to_free = indirect_to_ptr(to_free); + to_free = entry_to_node(to_free); /* * The candidate node has more than one child, or its child @@ -1418,11 +1415,8 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) if (!radix_tree_is_indirect_ptr(slot) && to_free->shift) break; - if (radix_tree_is_indirect_ptr(slot)) { - slot = indirect_to_ptr(slot); - slot->parent = NULL; - slot = node_to_entry(slot); - } + if (radix_tree_is_indirect_ptr(slot)) + entry_to_node(slot)->parent = NULL; /* * We don't need rcu_assign_pointer(), since we are simply @@ -1481,7 +1475,7 @@ bool __radix_tree_delete_node(struct radix_tree_root *root, struct radix_tree_node *parent; if (node->count) { - if (node == indirect_to_ptr(root->rnode)) + if (node == entry_to_node(root->rnode)) deleted |= radix_tree_shrink(root); return deleted; } diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 3004c58b9021..7b0bc1fa5919 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -149,7 +149,7 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, int i; int j; - slot = indirect_to_ptr(slot); + slot = entry_to_node(slot); /* Verify consistency at this level */ for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) { @@ -227,7 +227,7 @@ void tree_verify_min_height(struct radix_tree_root *root, int maxindex) return; } - node = indirect_to_ptr(node); + node = entry_to_node(node); assert(maxindex <= node_maxindex(node)); shift = node->shift; diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 866c8c676aa4..e85131369723 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -39,7 +39,6 @@ void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag); extern int nr_allocated; /* Normally private parts of lib/radix-tree.c */ -void *indirect_to_ptr(void *ptr); void radix_tree_dump(struct radix_tree_root *root); int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long node_maxindex(struct radix_tree_node *); -- cgit From b194d16c27af905d6e3552f4851bc7d9fee4e90f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:30 -0700 Subject: radix-tree: rename radix_tree_is_indirect_ptr() As with indirect_to_ptr(), ptr_to_indirect() and RADIX_TREE_INDIRECT_PTR, change radix_tree_is_indirect_ptr() to radix_tree_is_internal_node(). Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 10 ++++----- lib/radix-tree.c | 48 ++++++++++++++++++++--------------------- tools/testing/radix-tree/test.c | 4 ++-- 3 files changed, 31 insertions(+), 31 deletions(-) (limited to 'tools') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index b94aa198dd6b..bad63105e37e 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -57,7 +57,7 @@ #define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \ RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE))) -static inline int radix_tree_is_indirect_ptr(void *ptr) +static inline int radix_tree_is_internal_node(void *ptr) { return (int)((unsigned long)ptr & RADIX_TREE_INTERNAL_NODE); } @@ -224,7 +224,7 @@ static inline void *radix_tree_deref_slot_protected(void **pslot, */ static inline int radix_tree_deref_retry(void *arg) { - return unlikely(radix_tree_is_indirect_ptr(arg)); + return unlikely(radix_tree_is_internal_node(arg)); } /** @@ -259,7 +259,7 @@ static inline int radix_tree_exception(void *arg) */ static inline void radix_tree_replace_slot(void **pslot, void *item) { - BUG_ON(radix_tree_is_indirect_ptr(item)); + BUG_ON(radix_tree_is_internal_node(item)); rcu_assign_pointer(*pslot, item); } @@ -468,7 +468,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) if (unlikely(!iter->tags)) return NULL; while (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && - radix_tree_is_indirect_ptr(slot[1])) { + radix_tree_is_internal_node(slot[1])) { if (entry_to_node(slot[1]) == canon) { iter->tags >>= 1; iter->index = __radix_tree_iter_add(iter, 1); @@ -498,7 +498,7 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) iter->index = __radix_tree_iter_add(iter, 1); if (IS_ENABLED(CONFIG_RADIX_TREE_MULTIORDER) && - radix_tree_is_indirect_ptr(*slot)) { + radix_tree_is_internal_node(*slot)) { if (entry_to_node(*slot) == canon) continue; iter->next_index = iter->index; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3c3fdd9c5bb3..b65c83036ca4 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -100,7 +100,7 @@ static unsigned radix_tree_descend(struct radix_tree_node *parent, void **entry = rcu_dereference_raw(parent->slots[offset]); #ifdef CONFIG_RADIX_TREE_MULTIORDER - if (radix_tree_is_indirect_ptr(entry)) { + if (radix_tree_is_internal_node(entry)) { unsigned long siboff = get_slot_offset(parent, entry); if (siboff < RADIX_TREE_MAP_SIZE) { offset = siboff; @@ -232,7 +232,7 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) entry, i, *(void **)entry_to_node(entry), first, last); - } else if (!radix_tree_is_indirect_ptr(entry)) { + } else if (!radix_tree_is_internal_node(entry)) { pr_debug("radix entry %p offset %ld indices %ld-%ld\n", entry, i, first, last); } else { @@ -247,7 +247,7 @@ static void radix_tree_dump(struct radix_tree_root *root) pr_debug("radix root: %p rnode %p tags %x\n", root, root->rnode, root->gfp_mask >> __GFP_BITS_SHIFT); - if (!radix_tree_is_indirect_ptr(root->rnode)) + if (!radix_tree_is_internal_node(root->rnode)) return; dump_node(entry_to_node(root->rnode), 0); } @@ -302,7 +302,7 @@ radix_tree_node_alloc(struct radix_tree_root *root) ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask | __GFP_ACCOUNT); out: - BUG_ON(radix_tree_is_indirect_ptr(ret)); + BUG_ON(radix_tree_is_internal_node(ret)); return ret; } @@ -421,7 +421,7 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root, *nodep = node; - if (likely(radix_tree_is_indirect_ptr(node))) { + if (likely(radix_tree_is_internal_node(node))) { node = entry_to_node(node); *maxindex = node_maxindex(node); return node->shift + RADIX_TREE_MAP_SHIFT; @@ -467,7 +467,7 @@ static int radix_tree_extend(struct radix_tree_root *root, node->offset = 0; node->count = 1; node->parent = NULL; - if (radix_tree_is_indirect_ptr(slot)) + if (radix_tree_is_internal_node(slot)) entry_to_node(slot)->parent = node; node->slots[0] = slot; slot = node_to_entry(node); @@ -535,7 +535,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, } else rcu_assign_pointer(root->rnode, node_to_entry(slot)); - } else if (!radix_tree_is_indirect_ptr(slot)) + } else if (!radix_tree_is_internal_node(slot)) break; /* Go a level down */ @@ -585,7 +585,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, void **slot; int error; - BUG_ON(radix_tree_is_indirect_ptr(item)); + BUG_ON(radix_tree_is_internal_node(item)); error = __radix_tree_create(root, index, order, &node, &slot); if (error) @@ -637,7 +637,7 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, if (index > maxindex) return NULL; - while (radix_tree_is_indirect_ptr(node)) { + while (radix_tree_is_internal_node(node)) { unsigned offset; if (node == RADIX_TREE_RETRY) @@ -720,7 +720,7 @@ void *radix_tree_tag_set(struct radix_tree_root *root, shift = radix_tree_load_root(root, &node, &maxindex); BUG_ON(index > maxindex); - while (radix_tree_is_indirect_ptr(node)) { + while (radix_tree_is_internal_node(node)) { unsigned offset; shift -= RADIX_TREE_MAP_SHIFT; @@ -770,7 +770,7 @@ void *radix_tree_tag_clear(struct radix_tree_root *root, parent = NULL; - while (radix_tree_is_indirect_ptr(node)) { + while (radix_tree_is_internal_node(node)) { shift -= RADIX_TREE_MAP_SHIFT; offset = (index >> shift) & RADIX_TREE_MAP_MASK; @@ -835,7 +835,7 @@ int radix_tree_tag_get(struct radix_tree_root *root, if (node == NULL) return 0; - while (radix_tree_is_indirect_ptr(node)) { + while (radix_tree_is_internal_node(node)) { int offset; shift -= RADIX_TREE_MAP_SHIFT; @@ -900,7 +900,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if (index > maxindex) return NULL; - if (radix_tree_is_indirect_ptr(rnode)) { + if (radix_tree_is_internal_node(rnode)) { rnode = entry_to_node(rnode); } else if (rnode) { /* Single-slot tree */ @@ -957,7 +957,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if ((slot == NULL) || (slot == RADIX_TREE_RETRY)) goto restart; - if (!radix_tree_is_indirect_ptr(slot)) + if (!radix_tree_is_internal_node(slot)) break; node = entry_to_node(slot); @@ -1039,7 +1039,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, *first_indexp = last_index + 1; return 0; } - if (!radix_tree_is_indirect_ptr(slot)) { + if (!radix_tree_is_internal_node(slot)) { *first_indexp = last_index + 1; root_tag_set(root, settag); return 1; @@ -1059,7 +1059,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, if (!tag_get(node, iftag, offset)) goto next; /* Sibling slots never have tags set on them */ - if (radix_tree_is_indirect_ptr(slot)) { + if (radix_tree_is_internal_node(slot)) { node = entry_to_node(slot); shift -= RADIX_TREE_MAP_SHIFT; continue; @@ -1152,7 +1152,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results, results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; - if (radix_tree_is_indirect_ptr(results[ret])) { + if (radix_tree_is_internal_node(results[ret])) { slot = radix_tree_iter_retry(&iter); continue; } @@ -1235,7 +1235,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results, results[ret] = rcu_dereference_raw(*slot); if (!results[ret]) continue; - if (radix_tree_is_indirect_ptr(results[ret])) { + if (radix_tree_is_internal_node(results[ret])) { slot = radix_tree_iter_retry(&iter); continue; } @@ -1311,7 +1311,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, rcu_dereference_raw(slot->slots[i]); if (node == RADIX_TREE_RETRY) goto out; - if (!radix_tree_is_indirect_ptr(node)) { + if (!radix_tree_is_internal_node(node)) { if (node == item) { info->found_index = index; info->stop = true; @@ -1357,7 +1357,7 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) do { rcu_read_lock(); node = rcu_dereference_raw(root->rnode); - if (!radix_tree_is_indirect_ptr(node)) { + if (!radix_tree_is_internal_node(node)) { rcu_read_unlock(); if (node == item) info.found_index = 0; @@ -1398,7 +1398,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) struct radix_tree_node *to_free = root->rnode; struct radix_tree_node *slot; - if (!radix_tree_is_indirect_ptr(to_free)) + if (!radix_tree_is_internal_node(to_free)) break; to_free = entry_to_node(to_free); @@ -1412,10 +1412,10 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) slot = to_free->slots[0]; if (!slot) break; - if (!radix_tree_is_indirect_ptr(slot) && to_free->shift) + if (!radix_tree_is_internal_node(slot) && to_free->shift) break; - if (radix_tree_is_indirect_ptr(slot)) + if (radix_tree_is_internal_node(slot)) entry_to_node(slot)->parent = NULL; /* @@ -1445,7 +1445,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) * also results in a stale slot). So tag the slot as indirect * to force callers to retry. */ - if (!radix_tree_is_indirect_ptr(slot)) + if (!radix_tree_is_internal_node(slot)) to_free->slots[0] = RADIX_TREE_RETRY; radix_tree_node_free(to_free); diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 7b0bc1fa5919..a6e8099eaf4f 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -193,7 +193,7 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag) { struct radix_tree_node *node = root->rnode; - if (!radix_tree_is_indirect_ptr(node)) + if (!radix_tree_is_internal_node(node)) return; verify_node(node, tag, !!root_tag_get(root, tag)); } @@ -222,7 +222,7 @@ void tree_verify_min_height(struct radix_tree_root *root, int maxindex) { unsigned shift; struct radix_tree_node *node = root->rnode; - if (!radix_tree_is_indirect_ptr(node)) { + if (!radix_tree_is_internal_node(node)) { assert(maxindex == 0); return; } -- cgit From 8c1244de00ef98f73e21eecc42d84b2742fbb4f9 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 20 May 2016 17:03:36 -0700 Subject: radix-tree: tidy up next_chunk Convert radix_tree_next_chunk to use 'child' instead of 'slot' as the name of the child node. Also use node_maxindex() where it makes sense. The 'rnode' variable was unnecessary; it doesn't overlap in usage with 'node', so we can just use 'node' the whole way through the function. Improve the testcase to start the walk from every index in the carefully constructed tree, and to accept any index within the range covered by the entry. Signed-off-by: Matthew Wilcox Cc: Konstantin Khlebnikov Cc: Kirill Shutemov Cc: Jan Kara Cc: Neil Brown Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 53 +++++++------------ tools/testing/radix-tree/multiorder.c | 99 +++++++++++++++++++---------------- 2 files changed, 74 insertions(+), 78 deletions(-) (limited to 'tools') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 4b4a2a20a3d1..c42867a1769a 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -876,7 +876,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, struct radix_tree_iter *iter, unsigned flags) { unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; - struct radix_tree_node *rnode, *node; + struct radix_tree_node *node, *child; unsigned long index, offset, maxindex; if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) @@ -896,38 +896,29 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, return NULL; restart: - shift = radix_tree_load_root(root, &rnode, &maxindex); + shift = radix_tree_load_root(root, &child, &maxindex); if (index > maxindex) return NULL; + if (!child) + return NULL; - if (radix_tree_is_internal_node(rnode)) { - rnode = entry_to_node(rnode); - } else if (rnode) { + if (!radix_tree_is_internal_node(child)) { /* Single-slot tree */ iter->index = index; iter->next_index = maxindex + 1; iter->tags = 1; - __set_iter_shift(iter, shift); + __set_iter_shift(iter, 0); return (void **)&root->rnode; - } else - return NULL; - - shift -= RADIX_TREE_MAP_SHIFT; - offset = index >> shift; - - node = rnode; - while (1) { - struct radix_tree_node *slot; - unsigned new_off = radix_tree_descend(node, &slot, offset); + } - if (new_off < offset) { - offset = new_off; - index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); - index |= offset << shift; - } + do { + node = entry_to_node(child); + shift -= RADIX_TREE_MAP_SHIFT; + offset = (index >> shift) & RADIX_TREE_MAP_MASK; + offset = radix_tree_descend(node, &child, offset); if ((flags & RADIX_TREE_ITER_TAGGED) ? - !tag_get(node, tag, offset) : !slot) { + !tag_get(node, tag, offset) : !child) { /* Hole detected */ if (flags & RADIX_TREE_ITER_CONTIG) return NULL; @@ -945,29 +936,23 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, if (slot) break; } - index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1); + index &= ~node_maxindex(node); index += offset << shift; /* Overflow after ~0UL */ if (!index) return NULL; if (offset == RADIX_TREE_MAP_SIZE) goto restart; - slot = rcu_dereference_raw(node->slots[offset]); + child = rcu_dereference_raw(node->slots[offset]); } - if ((slot == NULL) || (slot == RADIX_TREE_RETRY)) + if ((child == NULL) || (child == RADIX_TREE_RETRY)) goto restart; - if (!radix_tree_is_internal_node(slot)) - break; - - node = entry_to_node(slot); - shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - } + } while (radix_tree_is_internal_node(child)); /* Update the iterator state */ - iter->index = index & ~((1 << shift) - 1); - iter->next_index = (index | ((RADIX_TREE_MAP_SIZE << shift) - 1)) + 1; + iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); + iter->next_index = (index | node_maxindex(node)) + 1; __set_iter_shift(iter, shift); /* Construct iter->tags bit-mask from node->tags[tag] array */ diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index c061f4bd6c05..39d9b9568fe2 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -202,7 +202,7 @@ void multiorder_iteration(void) RADIX_TREE(tree, GFP_KERNEL); struct radix_tree_iter iter; void **slot; - int i, err; + int i, j, err; printf("Multiorder iteration test\n"); @@ -215,29 +215,21 @@ void multiorder_iteration(void) assert(!err); } - i = 0; - /* start from index 1 to verify we find the multi-order entry at 0 */ - radix_tree_for_each_slot(slot, &tree, &iter, 1) { - int height = order[i] / RADIX_TREE_MAP_SHIFT; - int shift = height * RADIX_TREE_MAP_SHIFT; - - assert(iter.index == index[i]); - assert(iter.shift == shift); - i++; - } - - /* - * Now iterate through the tree starting at an elevated multi-order - * entry, beginning at an index in the middle of the range. - */ - i = 8; - radix_tree_for_each_slot(slot, &tree, &iter, 70) { - int height = order[i] / RADIX_TREE_MAP_SHIFT; - int shift = height * RADIX_TREE_MAP_SHIFT; - - assert(iter.index == index[i]); - assert(iter.shift == shift); - i++; + for (j = 0; j < 256; j++) { + for (i = 0; i < NUM_ENTRIES; i++) + if (j <= (index[i] | ((1 << order[i]) - 1))) + break; + + radix_tree_for_each_slot(slot, &tree, &iter, j) { + int height = order[i] / RADIX_TREE_MAP_SHIFT; + int shift = height * RADIX_TREE_MAP_SHIFT; + int mask = (1 << order[i]) - 1; + + assert(iter.index >= (index[i] &~ mask)); + assert(iter.index <= (index[i] | mask)); + assert(iter.shift == shift); + i++; + } } item_kill_tree(&tree); @@ -249,7 +241,7 @@ void multiorder_tagged_iteration(void) struct radix_tree_iter iter; void **slot; unsigned long first = 0; - int i; + int i, j; printf("Multiorder tagged iteration test\n"); @@ -268,30 +260,49 @@ void multiorder_tagged_iteration(void) for (i = 0; i < TAG_ENTRIES; i++) assert(radix_tree_tag_set(&tree, tag_index[i], 1)); - i = 0; - /* start from index 1 to verify we find the multi-order entry at 0 */ - radix_tree_for_each_tagged(slot, &tree, &iter, 1, 1) { - assert(iter.index == tag_index[i]); - i++; - } - - /* - * Now iterate through the tree starting at an elevated multi-order - * entry, beginning at an index in the middle of the range. - */ - i = 4; - radix_tree_for_each_slot(slot, &tree, &iter, 70) { - assert(iter.index == tag_index[i]); - i++; + for (j = 0; j < 256; j++) { + int mask, k; + + for (i = 0; i < TAG_ENTRIES; i++) { + for (k = i; index[k] < tag_index[i]; k++) + ; + if (j <= (index[k] | ((1 << order[k]) - 1))) + break; + } + + radix_tree_for_each_tagged(slot, &tree, &iter, j, 1) { + for (k = i; index[k] < tag_index[i]; k++) + ; + mask = (1 << order[k]) - 1; + + assert(iter.index >= (tag_index[i] &~ mask)); + assert(iter.index <= (tag_index[i] | mask)); + i++; + } } radix_tree_range_tag_if_tagged(&tree, &first, ~0UL, MT_NUM_ENTRIES, 1, 2); - i = 0; - radix_tree_for_each_tagged(slot, &tree, &iter, 1, 2) { - assert(iter.index == tag_index[i]); - i++; + for (j = 0; j < 256; j++) { + int mask, k; + + for (i = 0; i < TAG_ENTRIES; i++) { + for (k = i; index[k] < tag_index[i]; k++) + ; + if (j <= (index[k] | ((1 << order[k]) - 1))) + break; + } + + radix_tree_for_each_tagged(slot, &tree, &iter, j, 2) { + for (k = i; index[k] < tag_index[i]; k++) + ; + mask = (1 << order[k]) - 1; + + assert(iter.index >= (tag_index[i] &~ mask)); + assert(iter.index <= (tag_index[i] | mask)); + i++; + } } first = 1; -- cgit From ab68f26221366f92611650e8470e6a926801c7d4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2016 09:15:08 -0700 Subject: /dev/dax, pmem: direct access to persistent memory Device DAX is the device-centric analogue of Filesystem DAX (CONFIG_FS_DAX). It allows memory ranges to be allocated and mapped without need of an intervening file system. Device DAX is strict, precise and predictable. Specifically this interface: 1/ Guarantees fault granularity with respect to a given page size (pte, pmd, or pud) set at configuration time. 2/ Enforces deterministic behavior by being strict about what fault scenarios are supported. For example, by forcing MADV_DONTFORK semantics and omitting MAP_PRIVATE support device-dax guarantees that a mapping always behaves/performs the same once established. It is the "what you see is what you get" access mechanism to differentiated memory vs filesystem DAX which has filesystem specific implementation semantics. Persistent memory is the first target, but the mechanism is also targeted for exclusive allocations of performance differentiated memory ranges. This commit is limited to the base device driver infrastructure to associate a dax device with pmem range. Cc: Jeff Moyer Cc: Christoph Hellwig Cc: Andrew Morton Cc: Dave Hansen Cc: Ross Zwisler Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/dax/Kconfig | 25 ++++ drivers/dax/Makefile | 4 + drivers/dax/dax.c | 253 ++++++++++++++++++++++++++++++++++++ drivers/dax/dax.h | 24 ++++ drivers/dax/pmem.c | 158 ++++++++++++++++++++++ tools/testing/nvdimm/Kbuild | 9 ++ tools/testing/nvdimm/config_check.c | 2 + 9 files changed, 478 insertions(+) create mode 100644 drivers/dax/Kconfig create mode 100644 drivers/dax/Makefile create mode 100644 drivers/dax/dax.c create mode 100644 drivers/dax/dax.h create mode 100644 drivers/dax/pmem.c (limited to 'tools') diff --git a/drivers/Kconfig b/drivers/Kconfig index d2ac339de85f..8298eab84a6f 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -190,6 +190,8 @@ source "drivers/android/Kconfig" source "drivers/nvdimm/Kconfig" +source "drivers/dax/Kconfig" + source "drivers/nvmem/Kconfig" source "drivers/hwtracing/stm/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 8f5d076baeb0..0b6f3d60193d 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -66,6 +66,7 @@ obj-$(CONFIG_PARPORT) += parport/ obj-$(CONFIG_NVM) += lightnvm/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ +obj-$(CONFIG_DEV_DAX) += dax/ obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ obj-$(CONFIG_NUBUS) += nubus/ obj-y += macintosh/ diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig new file mode 100644 index 000000000000..86ffbaa891ad --- /dev/null +++ b/drivers/dax/Kconfig @@ -0,0 +1,25 @@ +menuconfig DEV_DAX + tristate "DAX: direct access to differentiated memory" + default m if NVDIMM_DAX + help + Support raw access to differentiated (persistence, bandwidth, + latency...) memory via an mmap(2) capable character + device. Platform firmware or a device driver may identify a + platform memory resource that is differentiated from the + baseline memory pool. Mappings of a /dev/daxX.Y device impose + restrictions that make the mapping behavior deterministic. + +if DEV_DAX + +config DEV_DAX_PMEM + tristate "PMEM DAX: direct access to persistent memory" + depends on NVDIMM_DAX + default DEV_DAX + help + Support raw access to persistent memory. Note that this + driver consumes memory ranges allocated and exported by the + libnvdimm sub-system. + + Say Y if unsure + +endif diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile new file mode 100644 index 000000000000..27c54e38478a --- /dev/null +++ b/drivers/dax/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_DEV_DAX) += dax.o +obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o + +dax_pmem-y := pmem.o diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c new file mode 100644 index 000000000000..4c22a40f2335 --- /dev/null +++ b/drivers/dax/dax.c @@ -0,0 +1,253 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +static int dax_major; +static struct class *dax_class; +static DEFINE_IDA(dax_minor_ida); + +/** + * struct dax_region - mapping infrastructure for dax devices + * @id: kernel-wide unique region for a memory range + * @base: linear address corresponding to @res + * @kref: to pin while other agents have a need to do lookups + * @dev: parent device backing this region + * @align: allocation and mapping alignment for child dax devices + * @res: physical address range of the region + * @pfn_flags: identify whether the pfns are paged back or not + */ +struct dax_region { + int id; + struct ida ida; + void *base; + struct kref kref; + struct device *dev; + unsigned int align; + struct resource res; + unsigned long pfn_flags; +}; + +/** + * struct dax_dev - subdivision of a dax region + * @region - parent region + * @dev - device backing the character device + * @kref - enable this data to be tracked in filp->private_data + * @id - child id in the region + * @num_resources - number of physical address extents in this device + * @res - array of physical address ranges + */ +struct dax_dev { + struct dax_region *region; + struct device *dev; + struct kref kref; + int id; + int num_resources; + struct resource res[0]; +}; + +static void dax_region_free(struct kref *kref) +{ + struct dax_region *dax_region; + + dax_region = container_of(kref, struct dax_region, kref); + kfree(dax_region); +} + +void dax_region_put(struct dax_region *dax_region) +{ + kref_put(&dax_region->kref, dax_region_free); +} +EXPORT_SYMBOL_GPL(dax_region_put); + +static void dax_dev_free(struct kref *kref) +{ + struct dax_dev *dax_dev; + + dax_dev = container_of(kref, struct dax_dev, kref); + dax_region_put(dax_dev->region); + kfree(dax_dev); +} + +static void dax_dev_put(struct dax_dev *dax_dev) +{ + kref_put(&dax_dev->kref, dax_dev_free); +} + +struct dax_region *alloc_dax_region(struct device *parent, int region_id, + struct resource *res, unsigned int align, void *addr, + unsigned long pfn_flags) +{ + struct dax_region *dax_region; + + dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); + + if (!dax_region) + return NULL; + + memcpy(&dax_region->res, res, sizeof(*res)); + dax_region->pfn_flags = pfn_flags; + kref_init(&dax_region->kref); + dax_region->id = region_id; + ida_init(&dax_region->ida); + dax_region->align = align; + dax_region->dev = parent; + dax_region->base = addr; + + return dax_region; +} +EXPORT_SYMBOL_GPL(alloc_dax_region); + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dax_dev *dax_dev = dev_get_drvdata(dev); + unsigned long long size = 0; + int i; + + for (i = 0; i < dax_dev->num_resources; i++) + size += resource_size(&dax_dev->res[i]); + + return sprintf(buf, "%llu\n", size); +} +static DEVICE_ATTR_RO(size); + +static struct attribute *dax_device_attributes[] = { + &dev_attr_size.attr, + NULL, +}; + +static const struct attribute_group dax_device_attribute_group = { + .attrs = dax_device_attributes, +}; + +static const struct attribute_group *dax_attribute_groups[] = { + &dax_device_attribute_group, + NULL, +}; + +static void unregister_dax_dev(void *_dev) +{ + struct device *dev = _dev; + struct dax_dev *dax_dev = dev_get_drvdata(dev); + struct dax_region *dax_region = dax_dev->region; + + dev_dbg(dev, "%s\n", __func__); + + get_device(dev); + device_unregister(dev); + ida_simple_remove(&dax_region->ida, dax_dev->id); + ida_simple_remove(&dax_minor_ida, MINOR(dev->devt)); + put_device(dev); + dax_dev_put(dax_dev); +} + +int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res, + int count) +{ + struct device *parent = dax_region->dev; + struct dax_dev *dax_dev; + struct device *dev; + int rc, minor; + dev_t dev_t; + + dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL); + if (!dax_dev) + return -ENOMEM; + memcpy(dax_dev->res, res, sizeof(*res) * count); + dax_dev->num_resources = count; + kref_init(&dax_dev->kref); + dax_dev->region = dax_region; + kref_get(&dax_region->kref); + + dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL); + if (dax_dev->id < 0) { + rc = dax_dev->id; + goto err_id; + } + + minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL); + if (minor < 0) { + rc = minor; + goto err_minor; + } + + dev_t = MKDEV(dax_major, minor); + dev = device_create_with_groups(dax_class, parent, dev_t, dax_dev, + dax_attribute_groups, "dax%d.%d", dax_region->id, + dax_dev->id); + if (IS_ERR(dev)) { + rc = PTR_ERR(dev); + goto err_create; + } + dax_dev->dev = dev; + + rc = devm_add_action(dax_region->dev, unregister_dax_dev, dev); + if (rc) { + unregister_dax_dev(dev); + return rc; + } + + return 0; + + err_create: + ida_simple_remove(&dax_minor_ida, minor); + err_minor: + ida_simple_remove(&dax_region->ida, dax_dev->id); + err_id: + dax_dev_put(dax_dev); + + return rc; +} +EXPORT_SYMBOL_GPL(devm_create_dax_dev); + +static const struct file_operations dax_fops = { + .llseek = noop_llseek, + .owner = THIS_MODULE, +}; + +static int __init dax_init(void) +{ + int rc; + + rc = register_chrdev(0, "dax", &dax_fops); + if (rc < 0) + return rc; + dax_major = rc; + + dax_class = class_create(THIS_MODULE, "dax"); + if (IS_ERR(dax_class)) { + unregister_chrdev(dax_major, "dax"); + return PTR_ERR(dax_class); + } + + return 0; +} + +static void __exit dax_exit(void) +{ + class_destroy(dax_class); + unregister_chrdev(dax_major, "dax"); + ida_destroy(&dax_minor_ida); +} + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL v2"); +subsys_initcall(dax_init); +module_exit(dax_exit); diff --git a/drivers/dax/dax.h b/drivers/dax/dax.h new file mode 100644 index 000000000000..d8b8f1f25054 --- /dev/null +++ b/drivers/dax/dax.h @@ -0,0 +1,24 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __DAX_H__ +#define __DAX_H__ +struct device; +struct resource; +struct dax_region; +void dax_region_put(struct dax_region *dax_region); +struct dax_region *alloc_dax_region(struct device *parent, + int region_id, struct resource *res, unsigned int align, + void *addr, unsigned long flags); +int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res, + int count); +#endif /* __DAX_H__ */ diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c new file mode 100644 index 000000000000..55d510e36cd1 --- /dev/null +++ b/drivers/dax/pmem.c @@ -0,0 +1,158 @@ +/* + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include "../nvdimm/pfn.h" +#include "../nvdimm/nd.h" +#include "dax.h" + +struct dax_pmem { + struct device *dev; + struct percpu_ref ref; + struct completion cmp; +}; + +struct dax_pmem *to_dax_pmem(struct percpu_ref *ref) +{ + return container_of(ref, struct dax_pmem, ref); +} + +static void dax_pmem_percpu_release(struct percpu_ref *ref) +{ + struct dax_pmem *dax_pmem = to_dax_pmem(ref); + + dev_dbg(dax_pmem->dev, "%s\n", __func__); + complete(&dax_pmem->cmp); +} + +static void dax_pmem_percpu_exit(void *data) +{ + struct percpu_ref *ref = data; + struct dax_pmem *dax_pmem = to_dax_pmem(ref); + + dev_dbg(dax_pmem->dev, "%s\n", __func__); + percpu_ref_exit(ref); + wait_for_completion(&dax_pmem->cmp); +} + +static void dax_pmem_percpu_kill(void *data) +{ + struct percpu_ref *ref = data; + struct dax_pmem *dax_pmem = to_dax_pmem(ref); + + dev_dbg(dax_pmem->dev, "%s\n", __func__); + percpu_ref_kill(ref); +} + +static int dax_pmem_probe(struct device *dev) +{ + int rc; + void *addr; + struct resource res; + struct nd_pfn_sb *pfn_sb; + struct dax_pmem *dax_pmem; + struct nd_region *nd_region; + struct nd_namespace_io *nsio; + struct dax_region *dax_region; + struct nd_namespace_common *ndns; + struct nd_dax *nd_dax = to_nd_dax(dev); + struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; + struct vmem_altmap __altmap, *altmap = NULL; + + ndns = nvdimm_namespace_common_probe(dev); + if (IS_ERR(ndns)) + return PTR_ERR(ndns); + nsio = to_nd_namespace_io(&ndns->dev); + + /* parse the 'pfn' info block via ->rw_bytes */ + devm_nsio_enable(dev, nsio); + altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap); + if (IS_ERR(altmap)) + return PTR_ERR(altmap); + devm_nsio_disable(dev, nsio); + + pfn_sb = nd_pfn->pfn_sb; + + if (!devm_request_mem_region(dev, nsio->res.start, + resource_size(&nsio->res), dev_name(dev))) { + dev_warn(dev, "could not reserve region %pR\n", &nsio->res); + return -EBUSY; + } + + dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL); + if (!dax_pmem) + return -ENOMEM; + + dax_pmem->dev = dev; + init_completion(&dax_pmem->cmp); + rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0, + GFP_KERNEL); + if (rc) + return rc; + + rc = devm_add_action(dev, dax_pmem_percpu_exit, &dax_pmem->ref); + if (rc) { + dax_pmem_percpu_exit(&dax_pmem->ref); + return rc; + } + + addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap); + if (IS_ERR(addr)) + return PTR_ERR(addr); + + rc = devm_add_action(dev, dax_pmem_percpu_kill, &dax_pmem->ref); + if (rc) { + dax_pmem_percpu_kill(&dax_pmem->ref); + return rc; + } + + nd_region = to_nd_region(dev->parent); + dax_region = alloc_dax_region(dev, nd_region->id, &res, + le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP); + if (!dax_region) + return -ENOMEM; + + /* TODO: support for subdividing a dax region... */ + rc = devm_create_dax_dev(dax_region, &res, 1); + + /* child dax_dev instances now own the lifetime of the dax_region */ + dax_region_put(dax_region); + + return rc; +} + +static struct nd_device_driver dax_pmem_driver = { + .probe = dax_pmem_probe, + .drv = { + .name = "dax_pmem", + }, + .type = ND_DRIVER_DAX_PMEM, +}; + +static int __init dax_pmem_init(void) +{ + return nd_driver_register(&dax_pmem_driver); +} +module_init(dax_pmem_init); + +static void __exit dax_pmem_exit(void) +{ + driver_unregister(&dax_pmem_driver.drv); +} +module_exit(dax_pmem_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DAX_PMEM); diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 5ff6d3c126a9..785985677159 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -16,6 +16,7 @@ ldflags-y += --wrap=phys_to_pfn_t DRIVERS := ../../../drivers NVDIMM_SRC := $(DRIVERS)/nvdimm ACPI_SRC := $(DRIVERS)/acpi +DAX_SRC := $(DRIVERS)/dax obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o @@ -23,6 +24,8 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o obj-$(CONFIG_ACPI_NFIT) += nfit.o +obj-$(CONFIG_DEV_DAX) += dax.o +obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o nfit-y := $(ACPI_SRC)/nfit.o nfit-y += config_check.o @@ -39,6 +42,12 @@ nd_blk-y += config_check.o nd_e820-y := $(NVDIMM_SRC)/e820.o nd_e820-y += config_check.o +dax-y := $(DAX_SRC)/dax.o +dax-y += config_check.o + +dax_pmem-y := $(DAX_SRC)/pmem.o +dax_pmem-y += config_check.o + libnvdimm-y := $(NVDIMM_SRC)/core.o libnvdimm-y += $(NVDIMM_SRC)/bus.o libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c index f2c7615554eb..adf18bfeca00 100644 --- a/tools/testing/nvdimm/config_check.c +++ b/tools/testing/nvdimm/config_check.c @@ -12,4 +12,6 @@ void check(void) BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX_PMEM)); } -- cgit From ce10c1b95095c652461616a3f887f6412ab6e1cc Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 8 May 2016 18:43:54 +0300 Subject: virtio: add inorder option skips ring accesses but drops out of order support Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/Makefile | 5 ++- tools/virtio/ringtest/virtio_ring_0_9.c | 49 ++++++++++++++++++++++++++++- tools/virtio/ringtest/virtio_ring_inorder.c | 2 ++ 3 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 tools/virtio/ringtest/virtio_ring_inorder.c (limited to 'tools') diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile index feaa64ac4630..6ba745529833 100644 --- a/tools/virtio/ringtest/Makefile +++ b/tools/virtio/ringtest/Makefile @@ -1,6 +1,6 @@ all: -all: ring virtio_ring_0_9 virtio_ring_poll +all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder CFLAGS += -Wall CFLAGS += -pthread -O2 -ggdb @@ -10,13 +10,16 @@ main.o: main.c main.h ring.o: ring.c main.h virtio_ring_0_9.o: virtio_ring_0_9.c main.h virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h +virtio_ring_inorder.o: virtio_ring_inorder.c virtio_ring_0_9.c main.h ring: ring.o main.o virtio_ring_0_9: virtio_ring_0_9.o main.o virtio_ring_poll: virtio_ring_poll.o main.o +virtio_ring_inorder: virtio_ring_inorder.o main.o clean: -rm main.o -rm ring.o ring -rm virtio_ring_0_9.o virtio_ring_0_9 -rm virtio_ring_poll.o virtio_ring_poll + -rm virtio_ring_inorder.o virtio_ring_inorder .PHONY: all clean diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c index 47c9a1a18d36..761866212aac 100644 --- a/tools/virtio/ringtest/virtio_ring_0_9.c +++ b/tools/virtio/ringtest/virtio_ring_0_9.c @@ -26,6 +26,14 @@ struct vring ring; * high bits of ring id ^ 0x8000). */ /* #ifdef RING_POLL */ +/* enabling the below activates experimental in-order code + * (which skips ring updates and reads and writes len in descriptor). + */ +/* #ifdef INORDER */ + +#if defined(RING_POLL) && defined(INORDER) +#error "RING_POLL and INORDER are mutually exclusive" +#endif /* how much padding is needed to avoid false cache sharing */ #define HOST_GUEST_PADDING 0x80 @@ -35,7 +43,11 @@ struct guest { unsigned short last_used_idx; unsigned short num_free; unsigned short kicked_avail_idx; +#ifndef INORDER unsigned short free_head; +#else + unsigned short reserved_free_head; +#endif unsigned char reserved[HOST_GUEST_PADDING - 10]; } guest; @@ -66,8 +78,10 @@ void alloc_ring(void) guest.avail_idx = 0; guest.kicked_avail_idx = -1; guest.last_used_idx = 0; +#ifndef INORDER /* Put everything in free lists. */ guest.free_head = 0; +#endif for (i = 0; i < ring_size - 1; i++) ring.desc[i].next = i + 1; host.used_idx = 0; @@ -84,13 +98,20 @@ void alloc_ring(void) /* guest side */ int add_inbuf(unsigned len, void *buf, void *datap) { - unsigned head, avail; + unsigned head; +#ifndef INORDER + unsigned avail; +#endif struct vring_desc *desc; if (!guest.num_free) return -1; +#ifdef INORDER + head = (ring_size - 1) & (guest.avail_idx++); +#else head = guest.free_head; +#endif guest.num_free--; desc = ring.desc; @@ -102,7 +123,9 @@ int add_inbuf(unsigned len, void *buf, void *datap) * descriptors. */ desc[head].flags &= ~VRING_DESC_F_NEXT; +#ifndef INORDER guest.free_head = desc[head].next; +#endif data[head].data = datap; @@ -113,8 +136,12 @@ int add_inbuf(unsigned len, void *buf, void *datap) ring.avail->ring[avail & (ring_size - 1)] = (head | (avail & ~(ring_size - 1))) ^ 0x8000; #else +#ifndef INORDER + /* Barrier A (for pairing) */ + smp_release(); avail = (ring_size - 1) & (guest.avail_idx++); ring.avail->ring[avail] = head; +#endif /* Barrier A (for pairing) */ smp_release(); #endif @@ -141,15 +168,27 @@ void *get_buf(unsigned *lenp, void **bufp) return NULL; /* Barrier B (for pairing) */ smp_acquire(); +#ifdef INORDER + head = (ring_size - 1) & guest.last_used_idx; + index = head; +#else head = (ring_size - 1) & guest.last_used_idx; index = ring.used->ring[head].id; #endif + +#endif +#ifdef INORDER + *lenp = ring.desc[index].len; +#else *lenp = ring.used->ring[head].len; +#endif datap = data[index].data; *bufp = (void*)(unsigned long)ring.desc[index].addr; data[index].data = NULL; +#ifndef INORDER ring.desc[index].next = guest.free_head; guest.free_head = index; +#endif guest.num_free++; guest.last_used_idx++; return datap; @@ -283,16 +322,24 @@ bool use_buf(unsigned *lenp, void **bufp) smp_acquire(); used_idx &= ring_size - 1; +#ifdef INORDER + head = used_idx; +#else head = ring.avail->ring[used_idx]; +#endif desc = &ring.desc[head]; #endif *lenp = desc->len; *bufp = (void *)(unsigned long)desc->addr; +#ifdef INORDER + desc->len = desc->len - 1; +#else /* now update used ring */ ring.used->ring[used_idx].id = head; ring.used->ring[used_idx].len = desc->len - 1; +#endif /* Barrier B (for pairing) */ smp_release(); host.used_idx++; diff --git a/tools/virtio/ringtest/virtio_ring_inorder.c b/tools/virtio/ringtest/virtio_ring_inorder.c new file mode 100644 index 000000000000..2438ca58a2ad --- /dev/null +++ b/tools/virtio/ringtest/virtio_ring_inorder.c @@ -0,0 +1,2 @@ +#define INORDER 1 +#include "virtio_ring_0_9.c" -- cgit From bb991288728e6a47a6f0fac6a4e9dfaeecc27956 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 22 May 2016 15:10:49 +0300 Subject: ringtest: pass buf != NULL just a stub pointer for now. Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c index 3a5ff438bd62..147abb452a6c 100644 --- a/tools/virtio/ringtest/main.c +++ b/tools/virtio/ringtest/main.c @@ -115,7 +115,7 @@ static void run_guest(void) do { if (started < bufs && started - completed < max_outstanding) { - r = add_inbuf(0, NULL, "Hello, world!"); + r = add_inbuf(0, "Buffer\n", "Hello, world!"); if (__builtin_expect(r == 0, true)) { ++started; if (!--tokick) { -- cgit From 64e2a42bca12e408f0258c56adcf3595bcd116e7 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 1 Apr 2016 22:40:53 +0200 Subject: parisc: Add ARCH_TRACEHOOK and regset support By adding TRACEHOOK support we now get a clean user interface to access registers via PTRACE_GETREGS, PTRACE_SETREGS, PTRACE_GETFPREGS and PTRACE_SETFPREGS. The user-visible regset struct user_regs_struct and user_fp_struct are modelled similiar to x86 and can be accessed via PTRACE_GETREGSET. Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 1 + arch/parisc/include/uapi/asm/ptrace.h | 48 ++++ arch/parisc/kernel/ptrace.c | 356 +++++++++++++++++++++++++- tools/testing/selftests/seccomp/seccomp_bpf.c | 8 +- 4 files changed, 410 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 9589511e9c95..6c68c23dd7c2 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -32,6 +32,7 @@ config PARISC select HAVE_DEBUG_STACKOVERFLOW select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK select ARCH_NO_COHERENT_DMA_MMAP select CPU_NO_EFFICIENT_FFS diff --git a/arch/parisc/include/uapi/asm/ptrace.h b/arch/parisc/include/uapi/asm/ptrace.h index c4fa6c8b9ad9..02ce2eb99a7f 100644 --- a/arch/parisc/include/uapi/asm/ptrace.h +++ b/arch/parisc/include/uapi/asm/ptrace.h @@ -13,6 +13,11 @@ * N.B. gdb/strace care about the size and offsets within this * structure. If you change things, you may break object compatibility * for those applications. + * + * Please do NOT use this structure for future programs, but use + * user_regs_struct (see below) instead. + * + * It can be accessed through PTRACE_PEEKUSR/PTRACE_POKEUSR only. */ struct pt_regs { @@ -33,6 +38,45 @@ struct pt_regs { unsigned long ipsw; /* CR22 */ }; +/** + * struct user_regs_struct - User general purpose registers + * + * This is the user-visible general purpose register state structure + * which is used to define the elf_gregset_t. + * + * It can be accessed through PTRACE_GETREGSET with NT_PRSTATUS + * and through PTRACE_GETREGS. + */ +struct user_regs_struct { + unsigned long gr[32]; /* PSW is in gr[0] */ + unsigned long sr[8]; + unsigned long iaoq[2]; + unsigned long iasq[2]; + unsigned long sar; /* CR11 */ + unsigned long iir; /* CR19 */ + unsigned long isr; /* CR20 */ + unsigned long ior; /* CR21 */ + unsigned long ipsw; /* CR22 */ + unsigned long cr0; + unsigned long cr24, cr25, cr26, cr27, cr28, cr29, cr30, cr31; + unsigned long cr8, cr9, cr12, cr13, cr10, cr15; + unsigned long _pad[80-64]; /* pad to ELF_NGREG (80) */ +}; + +/** + * struct user_fp_struct - User floating point registers + * + * This is the user-visible floating point register state structure. + * It uses the same layout and size as elf_fpregset_t. + * + * It can be accessed through PTRACE_GETREGSET with NT_PRFPREG + * and through PTRACE_GETFPREGS. + */ +struct user_fp_struct { + __u64 fr[32]; +}; + + /* * The numbers chosen here are somewhat arbitrary but absolutely MUST * not overlap with any of the number assigned in . @@ -43,5 +87,9 @@ struct pt_regs { */ #define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ +#define PTRACE_GETREGS 18 +#define PTRACE_SETREGS 19 +#define PTRACE_GETFPREGS 14 +#define PTRACE_SETFPREGS 15 #endif /* _UAPI_PARISC_PTRACE_H */ diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 4863761bdbbb..b5458b37fc5b 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -4,18 +4,20 @@ * Copyright (C) 2000 Hewlett-Packard Co, Linuxcare Inc. * Copyright (C) 2000 Matthew Wilcox * Copyright (C) 2000 David Huggins-Daines - * Copyright (C) 2008 Helge Deller + * Copyright (C) 2008-2016 Helge Deller */ #include #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -33,6 +35,14 @@ #define CREATE_TRACE_POINTS #include +/* + * These are our native regset flavors. + */ +enum parisc_regset { + REGSET_GENERAL, + REGSET_FP +}; + /* * Called by kernel/ptrace.c when detaching.. * @@ -117,6 +127,7 @@ void user_enable_block_step(struct task_struct *task) long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { + unsigned long __user *datap = (unsigned long __user *)data; unsigned long tmp; long ret = -EIO; @@ -129,7 +140,7 @@ long arch_ptrace(struct task_struct *child, long request, addr >= sizeof(struct pt_regs)) break; tmp = *(unsigned long *) ((char *) task_regs(child) + addr); - ret = put_user(tmp, (unsigned long __user *) data); + ret = put_user(tmp, datap); break; /* Write the word at location addr in the USER area. This will need @@ -168,6 +179,34 @@ long arch_ptrace(struct task_struct *child, long request, } break; + case PTRACE_GETREGS: /* Get all gp regs from the child. */ + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct), + datap); + + case PTRACE_SETREGS: /* Set all gp regs in the child. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_GENERAL, + 0, sizeof(struct user_regs_struct), + datap); + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + return copy_regset_to_user(child, + task_user_regset_view(current), + REGSET_FP, + 0, sizeof(struct user_fp_struct), + datap); + + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + return copy_regset_from_user(child, + task_user_regset_view(current), + REGSET_FP, + 0, sizeof(struct user_fp_struct), + datap); + default: ret = ptrace_request(child, request, addr, data); break; @@ -326,3 +365,316 @@ void do_syscall_trace_exit(struct pt_regs *regs) if (stepping || test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall_exit(regs, stepping); } + + +/* + * regset functions. + */ + +static int fpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs *regs = task_regs(target); + __u64 *k = kbuf; + __u64 __user *u = ubuf; + __u64 reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < ELF_NFPREG; --count) + *k++ = regs->fr[pos++]; + else + for (; count > 0 && pos < ELF_NFPREG; --count) + if (__put_user(regs->fr[pos++], u++)) + return -EFAULT; + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + ELF_NFPREG * sizeof(reg), -1); +} + +static int fpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_regs(target); + const __u64 *k = kbuf; + const __u64 __user *u = ubuf; + __u64 reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < ELF_NFPREG; --count) + regs->fr[pos++] = *k++; + else + for (; count > 0 && pos < ELF_NFPREG; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + regs->fr[pos++] = reg; + } + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + ELF_NFPREG * sizeof(reg), -1); +} + +#define RI(reg) (offsetof(struct user_regs_struct,reg) / sizeof(long)) + +static unsigned long get_reg(struct pt_regs *regs, int num) +{ + switch (num) { + case RI(gr[0]) ... RI(gr[31]): return regs->gr[num - RI(gr[0])]; + case RI(sr[0]) ... RI(sr[7]): return regs->sr[num - RI(sr[0])]; + case RI(iasq[0]): return regs->iasq[0]; + case RI(iasq[1]): return regs->iasq[1]; + case RI(iaoq[0]): return regs->iaoq[0]; + case RI(iaoq[1]): return regs->iaoq[1]; + case RI(sar): return regs->sar; + case RI(iir): return regs->iir; + case RI(isr): return regs->isr; + case RI(ior): return regs->ior; + case RI(ipsw): return regs->ipsw; + case RI(cr27): return regs->cr27; + case RI(cr0): return mfctl(0); + case RI(cr24): return mfctl(24); + case RI(cr25): return mfctl(25); + case RI(cr26): return mfctl(26); + case RI(cr28): return mfctl(28); + case RI(cr29): return mfctl(29); + case RI(cr30): return mfctl(30); + case RI(cr31): return mfctl(31); + case RI(cr8): return mfctl(8); + case RI(cr9): return mfctl(9); + case RI(cr12): return mfctl(12); + case RI(cr13): return mfctl(13); + case RI(cr10): return mfctl(10); + case RI(cr15): return mfctl(15); + default: return 0; + } +} + +static void set_reg(struct pt_regs *regs, int num, unsigned long val) +{ + switch (num) { + case RI(gr[0]): /* + * PSW is in gr[0]. + * Allow writing to Nullify, Divide-step-correction, + * and carry/borrow bits. + * BEWARE, if you set N, and then single step, it won't + * stop on the nullified instruction. + */ + val &= USER_PSW_BITS; + regs->gr[0] &= ~USER_PSW_BITS; + regs->gr[0] |= val; + return; + case RI(gr[1]) ... RI(gr[31]): + regs->gr[num - RI(gr[0])] = val; + return; + case RI(iaoq[0]): + case RI(iaoq[1]): + regs->iaoq[num - RI(iaoq[0])] = val; + return; + case RI(sar): regs->sar = val; + return; + default: return; +#if 0 + /* do not allow to change any of the following registers (yet) */ + case RI(sr[0]) ... RI(sr[7]): return regs->sr[num - RI(sr[0])]; + case RI(iasq[0]): return regs->iasq[0]; + case RI(iasq[1]): return regs->iasq[1]; + case RI(iir): return regs->iir; + case RI(isr): return regs->isr; + case RI(ior): return regs->ior; + case RI(ipsw): return regs->ipsw; + case RI(cr27): return regs->cr27; + case cr0, cr24, cr25, cr26, cr27, cr28, cr29, cr30, cr31; + case cr8, cr9, cr12, cr13, cr10, cr15; +#endif + } +} + +static int gpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs *regs = task_regs(target); + unsigned long *k = kbuf; + unsigned long __user *u = ubuf; + unsigned long reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < ELF_NGREG; --count) + *k++ = get_reg(regs, pos++); + else + for (; count > 0 && pos < ELF_NGREG; --count) + if (__put_user(get_reg(regs, pos++), u++)) + return -EFAULT; + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + ELF_NGREG * sizeof(reg), -1); +} + +static int gpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_regs(target); + const unsigned long *k = kbuf; + const unsigned long __user *u = ubuf; + unsigned long reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < ELF_NGREG; --count) + set_reg(regs, pos++, *k++); + else + for (; count > 0 && pos < ELF_NGREG; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + set_reg(regs, pos++, reg); + } + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + ELF_NGREG * sizeof(reg), -1); +} + +static const struct user_regset native_regsets[] = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + .size = sizeof(long), .align = sizeof(long), + .get = gpr_get, .set = gpr_set + }, + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + .size = sizeof(__u64), .align = sizeof(__u64), + .get = fpr_get, .set = fpr_set + } +}; + +static const struct user_regset_view user_parisc_native_view = { + .name = "parisc", .e_machine = ELF_ARCH, .ei_osabi = ELFOSABI_LINUX, + .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) +}; + +#ifdef CONFIG_64BIT +#include + +static int gpr32_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs *regs = task_regs(target); + compat_ulong_t *k = kbuf; + compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < ELF_NGREG; --count) + *k++ = get_reg(regs, pos++); + else + for (; count > 0 && pos < ELF_NGREG; --count) + if (__put_user((compat_ulong_t) get_reg(regs, pos++), u++)) + return -EFAULT; + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + ELF_NGREG * sizeof(reg), -1); +} + +static int gpr32_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs = task_regs(target); + const compat_ulong_t *k = kbuf; + const compat_ulong_t __user *u = ubuf; + compat_ulong_t reg; + + pos /= sizeof(reg); + count /= sizeof(reg); + + if (kbuf) + for (; count > 0 && pos < ELF_NGREG; --count) + set_reg(regs, pos++, *k++); + else + for (; count > 0 && pos < ELF_NGREG; --count) { + if (__get_user(reg, u++)) + return -EFAULT; + set_reg(regs, pos++, reg); + } + + kbuf = k; + ubuf = u; + pos *= sizeof(reg); + count *= sizeof(reg); + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + ELF_NGREG * sizeof(reg), -1); +} + +/* + * These are the regset flavors matching the 32bit native set. + */ +static const struct user_regset compat_regsets[] = { + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, .n = ELF_NGREG, + .size = sizeof(compat_long_t), .align = sizeof(compat_long_t), + .get = gpr32_get, .set = gpr32_set + }, + [REGSET_FP] = { + .core_note_type = NT_PRFPREG, .n = ELF_NFPREG, + .size = sizeof(__u64), .align = sizeof(__u64), + .get = fpr_get, .set = fpr_set + } +}; + +static const struct user_regset_view user_parisc_compat_view = { + .name = "parisc", .e_machine = EM_PARISC, .ei_osabi = ELFOSABI_LINUX, + .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets) +}; +#endif /* CONFIG_64BIT */ + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + BUILD_BUG_ON(sizeof(struct user_regs_struct)/sizeof(long) != ELF_NGREG); + BUILD_BUG_ON(sizeof(struct user_fp_struct)/sizeof(__u64) != ELF_NFPREG); +#ifdef CONFIG_64BIT + if (is_compat_task()) + return &user_parisc_compat_view; +#endif + return &user_parisc_native_view; +} diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7947e568e057..2e58549b2f02 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1234,6 +1234,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM regs[8] # define SYSCALL_RET regs[0] +#elif defined(__hppa__) +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM gr[20] +# define SYSCALL_RET gr[28] #elif defined(__powerpc__) # define ARCH_REGS struct pt_regs # define SYSCALL_NUM gpr[0] @@ -1303,7 +1307,7 @@ void change_syscall(struct __test_metadata *_metadata, EXPECT_EQ(0, ret); #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ - defined(__s390__) + defined(__s390__) || defined(__hppa__) { regs.SYSCALL_NUM = syscall; } @@ -1505,6 +1509,8 @@ TEST_F(TRACE_syscall, syscall_dropped) # define __NR_seccomp 383 # elif defined(__aarch64__) # define __NR_seccomp 277 +# elif defined(__hppa__) +# define __NR_seccomp 338 # elif defined(__powerpc__) # define __NR_seccomp 358 # elif defined(__s390__) -- cgit From a4351cb5511b917556c796d97068318a53a00849 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 31 Mar 2016 13:11:05 +0200 Subject: selftests/thuge-gen: Use platform specific MAP_HUGETLB value Do not hardcode MAP_HUGETLB to 0x40000, since quite some architectures use a different value. Tested with a parisc architecture 64bit kernel. Signed-off-by: Helge Deller --- tools/testing/selftests/vm/thuge-gen.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/vm/thuge-gen.c index c87957295f74..0bc737a75150 100644 --- a/tools/testing/selftests/vm/thuge-gen.c +++ b/tools/testing/selftests/vm/thuge-gen.c @@ -30,7 +30,9 @@ #define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) #define MAP_HUGE_SHIFT 26 #define MAP_HUGE_MASK 0x3f +#if !defined(MAP_HUGETLB) #define MAP_HUGETLB 0x40000 +#endif #define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ #define SHM_HUGE_SHIFT 26 -- cgit From 97f8827a8c7963756ae7d3ee898675b4667eca73 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 23 May 2016 10:04:46 -0400 Subject: ftracetest: Use proper logic to find process PID Half of the test in instance-event.tc was updated to use $! to find the PID of the previous background process that was launched, but the second part of the test still used the parsing of "jobs", which does not work on all shells like $! does. Signed-off-by: Steven Rostedt --- .../selftests/ftrace/test.d/instances/instance-event.tc | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc index 5f2abd03f16b..4c5a061a5b4e 100644 --- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc +++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc @@ -92,28 +92,23 @@ instance_slam() { } instance_slam & -x=`jobs -l` -p1=`echo $x | cut -d' ' -f2` +p1=$! echo $p1 instance_slam & -x=`jobs -l | tail -1` -p2=`echo $x | cut -d' ' -f2` +p2=$! echo $p2 instance_slam & -x=`jobs -l | tail -1` -p3=`echo $x | cut -d' ' -f2` +p3=$! echo $p3 instance_slam & -x=`jobs -l | tail -1` -p4=`echo $x | cut -d' ' -f2` +p4=$! echo $p4 instance_slam & -x=`jobs -l | tail -1` -p5=`echo $x | cut -d' ' -f2` +p5=$! echo $p5 ls -lR >/dev/null -- cgit From 508be0dfe6287d4e6452f5a1dc08856df74cb217 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 May 2016 13:15:08 -0700 Subject: perf report: Add srcline_from/to branch sort keys Add "srcline_from" and "srcline_to" branch sort keys that allow to show the source lines of a branch. That makes it much easier to track down where particular branches happen in the program, for example to examine branch mispredictions, or to associate it with cycle counts: % perf record -b -e cycles:p ./tcall % perf report --sort srcline_from,srcline_to,mispredict ... 15.10% tcall.c:18 tcall.c:10 N 14.83% tcall.c:11 tcall.c:5 N 14.12% tcall.c:7 tcall.c:12 N 14.04% tcall.c:12 tcall.c:5 N 12.42% tcall.c:17 tcall.c:18 N 12.39% tcall.c:7 tcall.c:13 N 12.27% tcall.c:13 tcall.c:17 N ... % perf report --sort srcline_from,srcline_to,cycles ... 17.12% tcall.c:18 tcall.c:11 1 17.01% tcall.c:12 tcall.c:6 1 16.98% tcall.c:11 tcall.c:6 1 15.91% tcall.c:17 tcall.c:18 1 6.38% tcall.c:7 tcall.c:17 7 4.80% tcall.c:7 tcall.c:12 8 4.21% tcall.c:7 tcall.c:17 8 2.67% tcall.c:7 tcall.c:12 7 2.62% tcall.c:7 tcall.c:12 10 2.10% tcall.c:7 tcall.c:17 9 1.58% tcall.c:7 tcall.c:12 6 1.44% tcall.c:7 tcall.c:12 5 1.38% tcall.c:7 tcall.c:12 9 1.06% tcall.c:7 tcall.c:17 13 1.05% tcall.c:7 tcall.c:12 4 1.01% tcall.c:7 tcall.c:17 6 Open issues: - Some kernel symbols get misresolved. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/1463775308-32748-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 3 +- tools/perf/util/hist.c | 9 ++++ tools/perf/util/hist.h | 2 + tools/perf/util/sort.c | 84 ++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 2 + tools/perf/util/symbol.h | 2 + 6 files changed, 101 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 496d42cdf02b..9cbddc290aff 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -103,12 +103,13 @@ OPTIONS If --branch-stack option is used, following sort keys are also available: - dso_from, dso_to, symbol_from, symbol_to, mispredict. - dso_from: name of library or module branched from - dso_to: name of library or module branched to - symbol_from: name of function branched from - symbol_to: name of function branched to + - srcline_from: source file and line branched from + - srcline_to: source file and line branched to - mispredict: "N" for predicted branch, "Y" for mispredicted branch - in_tx: branch in TSX transaction - abort: TSX transaction abort. diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index cfab531437c7..d1f19e0012d4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -117,6 +117,13 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); hists__set_unres_dso_col_len(hists, HISTC_DSO_TO); } + + if (h->branch_info->srcline_from) + hists__new_col_len(hists, HISTC_SRCLINE_FROM, + strlen(h->branch_info->srcline_from)); + if (h->branch_info->srcline_to) + hists__new_col_len(hists, HISTC_SRCLINE_TO, + strlen(h->branch_info->srcline_to)); } if (h->mem_info) { @@ -1042,6 +1049,8 @@ void hist_entry__delete(struct hist_entry *he) if (he->branch_info) { map__zput(he->branch_info->from.map); map__zput(he->branch_info->to.map); + free_srcline(he->branch_info->srcline_from); + free_srcline(he->branch_info->srcline_to); zfree(&he->branch_info); } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 0f84bfb42bb1..7b54ccf1b737 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -52,6 +52,8 @@ enum hist_column { HISTC_MEM_IADDR_SYMBOL, HISTC_TRANSACTION, HISTC_CYCLES, + HISTC_SRCLINE_FROM, + HISTC_SRCLINE_TO, HISTC_TRACE, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 20e69edd5006..c4e9bd70723c 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -353,6 +353,88 @@ struct sort_entry sort_srcline = { .se_width_idx = HISTC_SRCLINE, }; +/* --sort srcline_from */ + +static int64_t +sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) +{ + if (!left->branch_info->srcline_from) { + struct map *map = left->branch_info->from.map; + if (!map) + left->branch_info->srcline_from = SRCLINE_UNKNOWN; + else + left->branch_info->srcline_from = get_srcline(map->dso, + map__rip_2objdump(map, + left->branch_info->from.al_addr), + left->branch_info->from.sym, true); + } + if (!right->branch_info->srcline_from) { + struct map *map = right->branch_info->from.map; + if (!map) + right->branch_info->srcline_from = SRCLINE_UNKNOWN; + else + right->branch_info->srcline_from = get_srcline(map->dso, + map__rip_2objdump(map, + right->branch_info->from.al_addr), + right->branch_info->from.sym, true); + } + return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from); +} + +static int hist_entry__srcline_from_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*.*s", width, width, he->branch_info->srcline_from); +} + +struct sort_entry sort_srcline_from = { + .se_header = "From Source:Line", + .se_cmp = sort__srcline_from_cmp, + .se_snprintf = hist_entry__srcline_from_snprintf, + .se_width_idx = HISTC_SRCLINE_FROM, +}; + +/* --sort srcline_to */ + +static int64_t +sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) +{ + if (!left->branch_info->srcline_to) { + struct map *map = left->branch_info->to.map; + if (!map) + left->branch_info->srcline_to = SRCLINE_UNKNOWN; + else + left->branch_info->srcline_to = get_srcline(map->dso, + map__rip_2objdump(map, + left->branch_info->to.al_addr), + left->branch_info->from.sym, true); + } + if (!right->branch_info->srcline_to) { + struct map *map = right->branch_info->to.map; + if (!map) + right->branch_info->srcline_to = SRCLINE_UNKNOWN; + else + right->branch_info->srcline_to = get_srcline(map->dso, + map__rip_2objdump(map, + right->branch_info->to.al_addr), + right->branch_info->to.sym, true); + } + return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to); +} + +static int hist_entry__srcline_to_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*.*s", width, width, he->branch_info->srcline_to); +} + +struct sort_entry sort_srcline_to = { + .se_header = "To Source:Line", + .se_cmp = sort__srcline_to_cmp, + .se_snprintf = hist_entry__srcline_to_snprintf, + .se_width_idx = HISTC_SRCLINE_TO, +}; + /* --sort srcfile */ static char no_srcfile[1]; @@ -1347,6 +1429,8 @@ static struct sort_dimension bstack_sort_dimensions[] = { DIM(SORT_IN_TX, "in_tx", sort_in_tx), DIM(SORT_ABORT, "abort", sort_abort), DIM(SORT_CYCLES, "cycles", sort_cycles), + DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from), + DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 42927f448bcb..ebb59cacd092 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -215,6 +215,8 @@ enum sort_type { SORT_ABORT, SORT_IN_TX, SORT_CYCLES, + SORT_SRCLINE_FROM, + SORT_SRCLINE_TO, /* memory mode specific sort keys */ __SORT_MEMORY_MODE, diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index fa415347dbf9..b10d558a8803 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -186,6 +186,8 @@ struct branch_info { struct addr_map_symbol from; struct addr_map_symbol to; struct branch_flags flags; + char *srcline_from; + char *srcline_to; }; struct mem_info { -- cgit From b6565c908ad7eb28dfdda9578ec5a074e080cedc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 23 May 2016 12:59:53 -0300 Subject: perf trace: Use the fd->name beautifier as default for "fd" args Noticed when the 'setsockopt' 'fd' arg wasn't being formatted via the SCA_FD beautifier, so just remove the setting of "fd" args to SCA_FD and do it when reading the syscall info, like we do for args of type "pid_t", i.e. "fd" as the name should be enough as the decision to use the SFA_FD beautifier. For odd cases we can just do it explicitely. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-0qissgetiuqmqyj4b6ancmpn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 132 +++++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 78 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 487c10401d46..181f69cd6a8e 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -595,65 +595,45 @@ static struct syscall_fmt { { .name = "connect", .errmsg = true, }, { .name = "creat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "dup", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "dup2", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "dup3", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "dup", .errmsg = true, }, + { .name = "dup2", .errmsg = true, }, + { .name = "dup3", .errmsg = true, }, { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, { .name = "eventfd2", .errmsg = true, .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, { .name = "faccessat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [1] = SCA_FILENAME, /* filename */ }, }, - { .name = "fadvise64", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "fallocate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "fchdir", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "fchmod", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [1] = SCA_FILENAME, /* filename */ }, }, + { .name = "fadvise64", .errmsg = true, }, + { .name = "fallocate", .errmsg = true, }, + { .name = "fchdir", .errmsg = true, }, + { .name = "fchmod", .errmsg = true, }, { .name = "fchmodat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ [1] = SCA_FILENAME, /* filename */ }, }, - { .name = "fchown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "fchown", .errmsg = true, }, { .name = "fchownat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ [1] = SCA_FILENAME, /* filename */ }, }, { .name = "fcntl", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [1] = SCA_STRARRAY, /* cmd */ }, + .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ }, .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, - { .name = "fdatasync", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "fdatasync", .errmsg = true, }, { .name = "flock", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [1] = SCA_FLOCK, /* cmd */ }, }, - { .name = "fsetxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "fstat", .errmsg = true, .alias = "newfstat", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, }, + { .name = "fsetxattr", .errmsg = true, }, + { .name = "fstat", .errmsg = true, .alias = "newfstat", }, { .name = "fstatat", .errmsg = true, .alias = "newfstatat", - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [1] = SCA_FILENAME, /* filename */ }, }, - { .name = "fstatfs", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "fsync", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "ftruncate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [1] = SCA_FILENAME, /* filename */ }, }, + { .name = "fstatfs", .errmsg = true, }, + { .name = "fsync", .errmsg = true, }, + { .name = "ftruncate", .errmsg = true, }, { .name = "futex", .errmsg = true, .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, { .name = "futimesat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ [1] = SCA_FILENAME, /* filename */ }, }, - { .name = "getdents", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "getdents64", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "getdents", .errmsg = true, }, + { .name = "getdents64", .errmsg = true, }, { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "getpid", .errpid = true, }, { .name = "getpgid", .errpid = true, }, @@ -666,7 +646,7 @@ static struct syscall_fmt { { .name = "inotify_add_watch", .errmsg = true, .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, }, { .name = "ioctl", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ + .arg_scnprintf = { #if defined(__i386__) || defined(__x86_64__) /* * FIXME: Make this available to all arches. @@ -693,8 +673,7 @@ static struct syscall_fmt { { .name = "lremovexattr", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "lseek", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [2] = SCA_STRARRAY, /* whence */ }, + .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ }, .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, { .name = "lsetxattr", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, @@ -722,8 +701,7 @@ static struct syscall_fmt { { .name = "mmap", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ [2] = SCA_MMAP_PROT, /* prot */ - [3] = SCA_MMAP_FLAGS, /* flags */ - [4] = SCA_FD, /* fd */ }, }, + [3] = SCA_MMAP_FLAGS, /* flags */ }, }, { .name = "mprotect", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MMAP_PROT, /* prot */ }, }, @@ -760,33 +738,24 @@ static struct syscall_fmt { .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, { .name = "poll", .errmsg = true, .timeout = true, }, { .name = "ppoll", .errmsg = true, .timeout = true, }, - { .name = "pread", .errmsg = true, .alias = "pread64", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "preadv", .errmsg = true, .alias = "pread", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "pread", .errmsg = true, .alias = "pread64", }, + { .name = "preadv", .errmsg = true, .alias = "pread", }, { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, - { .name = "pwrite", .errmsg = true, .alias = "pwrite64", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "pwritev", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "read", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, + { .name = "pwritev", .errmsg = true, }, + { .name = "read", .errmsg = true, }, { .name = "readlink", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, }, { .name = "readlinkat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ [1] = SCA_FILENAME, /* pathname */ }, }, - { .name = "readv", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "readv", .errmsg = true, }, { .name = "recvfrom", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmmsg", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmsg", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [2] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "removexattr", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, { .name = "renameat", .errmsg = true, @@ -807,22 +776,18 @@ static struct syscall_fmt { [1] = SCA_SECCOMP_FLAGS, /* flags */ }, }, { .name = "select", .errmsg = true, .timeout = true, }, { .name = "sendmmsg", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "sendmsg", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [2] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "sendto", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ - [3] = SCA_MSG_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "set_tid_address", .errpid = true, }, { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "setpgid", .errmsg = true, }, { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "setxattr", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "shutdown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "shutdown", .errmsg = true, }, { .name = "socket", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, @@ -858,16 +823,13 @@ static struct syscall_fmt { [1] = SCA_FILENAME, /* filename */ }, }, { .name = "utimes", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, - { .name = "vmsplice", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "vmsplice", .errmsg = true, }, { .name = "wait4", .errpid = true, .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, }, { .name = "waitid", .errpid = true, .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, }, - { .name = "write", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, - { .name = "writev", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + { .name = "write", .errmsg = true, }, + { .name = "writev", .errmsg = true, }, }; static int syscall_fmt__cmp(const void *name, const void *fmtp) @@ -1204,7 +1166,7 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) static int syscall__set_arg_fmts(struct syscall *sc) { struct format_field *field; - int idx = 0; + int idx = 0, len; sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *)); if (sc->arg_scnprintf == NULL) @@ -1222,6 +1184,20 @@ static int syscall__set_arg_fmts(struct syscall *sc) sc->arg_scnprintf[idx] = SCA_PID; else if (strcmp(field->type, "umode_t") == 0) sc->arg_scnprintf[idx] = SCA_MODE_T; + else if ((strcmp(field->type, "int") == 0 || + strcmp(field->type, "unsigned int") == 0 || + strcmp(field->type, "long") == 0) && + (len = strlen(field->name)) >= 2 && + strcmp(field->name + len - 2, "fd") == 0) { + /* + * /sys/kernel/tracing/events/syscalls/sys_enter* + * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c + * 65 int + * 23 unsigned int + * 7 unsigned long + */ + sc->arg_scnprintf[idx] = SCA_FD; + } ++idx; } -- cgit From 12f3ca4fc8e27aa602c9c3c717d755b1e8f7fd47 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 23 May 2016 16:37:55 -0300 Subject: perf trace: Use the ptr->name beautifier as default for "filename" args Auto-attach the ptr->name beautifier to syscall args "filename", "path" and "pathname" if they are of type "const char *". Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-jxii4qmcgoppftv0zdvml9d7@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 120 ++++++++++++++++----------------------------- 1 file changed, 43 insertions(+), 77 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 181f69cd6a8e..5c50fe70d6b3 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -576,44 +576,36 @@ static struct syscall_fmt { bool hexret; } syscall_fmts[] = { { .name = "access", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ - [1] = SCA_ACCMODE, /* mode */ }, }, + .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), }, { .name = "brk", .hexret = true, .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, - { .name = "chdir", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, - { .name = "chmod", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, - { .name = "chroot", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + { .name = "chdir", .errmsg = true, }, + { .name = "chmod", .errmsg = true, }, + { .name = "chroot", .errmsg = true, }, { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, { .name = "clone", .errpid = true, }, { .name = "close", .errmsg = true, .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, { .name = "connect", .errmsg = true, }, - { .name = "creat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "creat", .errmsg = true, }, { .name = "dup", .errmsg = true, }, { .name = "dup2", .errmsg = true, }, { .name = "dup3", .errmsg = true, }, { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, { .name = "eventfd2", .errmsg = true, .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, - { .name = "faccessat", .errmsg = true, - .arg_scnprintf = { [1] = SCA_FILENAME, /* filename */ }, }, + { .name = "faccessat", .errmsg = true, }, { .name = "fadvise64", .errmsg = true, }, { .name = "fallocate", .errmsg = true, }, { .name = "fchdir", .errmsg = true, }, { .name = "fchmod", .errmsg = true, }, { .name = "fchmodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ - [1] = SCA_FILENAME, /* filename */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fchown", .errmsg = true, }, { .name = "fchownat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ - [1] = SCA_FILENAME, /* filename */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fcntl", .errmsg = true, .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ }, .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, @@ -622,16 +614,14 @@ static struct syscall_fmt { .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, }, { .name = "fsetxattr", .errmsg = true, }, { .name = "fstat", .errmsg = true, .alias = "newfstat", }, - { .name = "fstatat", .errmsg = true, .alias = "newfstatat", - .arg_scnprintf = { [1] = SCA_FILENAME, /* filename */ }, }, + { .name = "fstatat", .errmsg = true, .alias = "newfstatat", }, { .name = "fstatfs", .errmsg = true, }, { .name = "fsync", .errmsg = true, }, { .name = "ftruncate", .errmsg = true, }, { .name = "futex", .errmsg = true, .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, { .name = "futimesat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ - [1] = SCA_FILENAME, /* filename */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "getdents", .errmsg = true, }, { .name = "getdents64", .errmsg = true, }, { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, @@ -641,10 +631,8 @@ static struct syscall_fmt { { .name = "getrandom", .errmsg = true, .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, }, { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, - { .name = "getxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "inotify_add_watch", .errmsg = true, - .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, }, + { .name = "getxattr", .errmsg = true, }, + { .name = "inotify_add_watch", .errmsg = true, }, { .name = "ioctl", .errmsg = true, .arg_scnprintf = { #if defined(__i386__) || defined(__x86_64__) @@ -660,40 +648,28 @@ static struct syscall_fmt { { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), }, { .name = "kill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, - { .name = "lchown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, - { .name = "lgetxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "lchown", .errmsg = true, }, + { .name = "lgetxattr", .errmsg = true, }, { .name = "linkat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, - { .name = "listxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "llistxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "lremovexattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "listxattr", .errmsg = true, }, + { .name = "llistxattr", .errmsg = true, }, + { .name = "lremovexattr", .errmsg = true, }, { .name = "lseek", .errmsg = true, .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ }, .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, - { .name = "lsetxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "lstat", .errmsg = true, .alias = "newlstat", - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, - { .name = "lsxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "lsetxattr", .errmsg = true, }, + { .name = "lstat", .errmsg = true, .alias = "newlstat", }, + { .name = "lsxattr", .errmsg = true, }, { .name = "madvise", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MADV_BHV, /* behavior */ }, }, - { .name = "mkdir", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "mkdir", .errmsg = true, }, { .name = "mkdirat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ - [1] = SCA_FILENAME, /* pathname */ }, }, - { .name = "mknod", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + { .name = "mknod", .errmsg = true, }, { .name = "mknodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ - [1] = SCA_FILENAME, /* filename */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "mlock", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mlockall", .errmsg = true, @@ -718,17 +694,14 @@ static struct syscall_fmt { { .name = "name_to_handle_at", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "newfstatat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [1] = SCA_FILENAME, /* filename */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "open", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ - [1] = SCA_OPEN_FLAGS, /* flags */ }, }, + .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "open_by_handle_at", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ [2] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "openat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [1] = SCA_FILENAME, /* filename */ [2] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "perf_event_open", .errmsg = true, .arg_scnprintf = { [2] = SCA_INT, /* cpu */ @@ -744,11 +717,9 @@ static struct syscall_fmt { { .name = "pwrite", .errmsg = true, .alias = "pwrite64", }, { .name = "pwritev", .errmsg = true, }, { .name = "read", .errmsg = true, }, - { .name = "readlink", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, }, + { .name = "readlink", .errmsg = true, }, { .name = "readlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [1] = SCA_FILENAME, /* pathname */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "readv", .errmsg = true, }, { .name = "recvfrom", .errmsg = true, .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, @@ -756,12 +727,10 @@ static struct syscall_fmt { .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmsg", .errmsg = true, .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, - { .name = "removexattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "removexattr", .errmsg = true, }, { .name = "renameat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, - { .name = "rmdir", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "rmdir", .errmsg = true, }, { .name = "rt_sigaction", .errmsg = true, .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, @@ -785,8 +754,7 @@ static struct syscall_fmt { { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "setpgid", .errmsg = true, }, { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, - { .name = "setxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "setxattr", .errmsg = true, }, { .name = "shutdown", .errmsg = true, }, { .name = "socket", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ @@ -796,10 +764,8 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, - { .name = "stat", .errmsg = true, .alias = "newstat", - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, - { .name = "statfs", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, + { .name = "stat", .errmsg = true, .alias = "newstat", }, + { .name = "statfs", .errmsg = true, }, { .name = "swapoff", .errmsg = true, .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, { .name = "swapon", .errmsg = true, @@ -810,19 +776,14 @@ static struct syscall_fmt { .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, { .name = "tkill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, - { .name = "truncate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, }, + { .name = "truncate", .errmsg = true, }, { .name = "uname", .errmsg = true, .alias = "newuname", }, { .name = "unlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ - [1] = SCA_FILENAME, /* pathname */ }, }, - { .name = "utime", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + { .name = "utime", .errmsg = true, }, { .name = "utimensat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ - [1] = SCA_FILENAME, /* filename */ }, }, - { .name = "utimes", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, + { .name = "utimes", .errmsg = true, }, { .name = "vmsplice", .errmsg = true, }, { .name = "wait4", .errpid = true, .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, }, @@ -1178,6 +1139,11 @@ static int syscall__set_arg_fmts(struct syscall *sc) for (field = sc->args; field; field = field->next) { if (sc->fmt && sc->fmt->arg_scnprintf[idx]) sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; + else if (strcmp(field->type, "const char *") == 0 && + (strcmp(field->name, "filename") == 0 || + strcmp(field->name, "path") == 0 || + strcmp(field->name, "pathname") == 0)) + sc->arg_scnprintf[idx] = SCA_FILENAME; else if (field->flags & FIELD_IS_POINTER) sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex; else if (strcmp(field->type, "pid_t") == 0) -- cgit From 65aea2338765da1a58cc26eeb84d72308492ecb5 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 23 May 2016 07:13:38 +0000 Subject: perf evlist: Add API to pause/resume perf_evlist__toggle_{pause,resume}() are introduced to pause/resume events in an evlist. Utilize PERF_EVENT_IOC_PAUSE_OUTPUT ioctl. Following commits use them to ensure overwrite ring buffer is paused before reading. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1463987628-163563-2-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang [ Return -1, like all other ioctl() usage in evlist.c, rename 'pause' arg to avoid breaking the build on ubuntu 12.04 and other old systems ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 27 +++++++++++++++++++++++++++ tools/perf/util/evlist.h | 2 ++ 2 files changed, 29 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 1a370db02a8c..904523a2be90 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -679,6 +679,33 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, return NULL; } +static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) +{ + int i; + + for (i = 0; i < evlist->nr_mmaps; i++) { + int fd = evlist->mmap[i].fd; + int err; + + if (fd < 0) + continue; + err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); + if (err) + return err; + } + return 0; +} + +int perf_evlist__pause(struct perf_evlist *evlist) +{ + return perf_evlist__set_paused(evlist, true); +} + +int perf_evlist__resume(struct perf_evlist *evlist) +{ + return perf_evlist__set_paused(evlist, false); +} + /* When check_messup is true, 'end' must points to a good entry */ static union perf_event * perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 0d165b1d8f77..97090b70976d 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -136,6 +136,8 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx); void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx); +int perf_evlist__pause(struct perf_evlist *evlist); +int perf_evlist__resume(struct perf_evlist *evlist); int perf_evlist__open(struct perf_evlist *evlist); void perf_evlist__close(struct perf_evlist *evlist); -- cgit From 2d11c65071d489e20b3a811167507939dd8c2eac Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 23 May 2016 07:13:39 +0000 Subject: perf record: Prevent reading invalid data in record__mmap_read When record__mmap_read() requires data more than the size of ring buffer, drop those data to avoid accessing invalid memory. This can happen when reading from overwritable ring buffer, which should be avoided. However, check this for robustness. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1463987628-163563-3-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f3679c44d3f3..f302cc937ca5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -40,6 +40,7 @@ #include #include #include +#include struct record { @@ -98,6 +99,13 @@ static int record__mmap_read(struct record *rec, int idx) rec->samples++; size = head - old; + if (size > (unsigned long)(md->mask) + 1) { + WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); + + md->prev = head; + perf_evlist__mmap_consume(rec->evlist, idx); + return 0; + } if ((old & md->mask) + size != (head & md->mask)) { buf = &data[old & md->mask]; -- cgit From 09fa4f401296f555afb6f2f4282717644d94722e Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 23 May 2016 07:13:40 +0000 Subject: perf record: Rename variable to make code clear record__mmap_read() writes data from ring buffer into perf.data. 'head' is maintained by the kernel, points to the last written record. 'old' is maintained by perf, points to the record read in previous round. record__mmap_read() saves data from 'old' to 'head' to perf.data. The names of these variables are not very intutive. In addition, when dealing with backward writing ring buffer, the md->prev pointer should point to 'head' instead of the last byte it got. Add 'start' and 'end' pointer to make code clear and set md->prev to 'head' instead of the moved 'old' pointer. This patch doesn't change behavior since: buf = &data[old & md->mask]; size = head - old; old += size; <--- Here, old == head Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1463987628-163563-4-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f302cc937ca5..73ce651c84f6 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -88,17 +88,18 @@ static int record__mmap_read(struct record *rec, int idx) struct perf_mmap *md = &rec->evlist->mmap[idx]; u64 head = perf_mmap__read_head(md); u64 old = md->prev; + u64 end = head, start = old; unsigned char *data = md->base + page_size; unsigned long size; void *buf; int rc = 0; - if (old == head) + if (start == end) return 0; rec->samples++; - size = head - old; + size = end - start; if (size > (unsigned long)(md->mask) + 1) { WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); @@ -107,10 +108,10 @@ static int record__mmap_read(struct record *rec, int idx) return 0; } - if ((old & md->mask) + size != (head & md->mask)) { - buf = &data[old & md->mask]; - size = md->mask + 1 - (old & md->mask); - old += size; + if ((start & md->mask) + size != (end & md->mask)) { + buf = &data[start & md->mask]; + size = md->mask + 1 - (start & md->mask); + start += size; if (record__write(rec, buf, size) < 0) { rc = -1; @@ -118,16 +119,16 @@ static int record__mmap_read(struct record *rec, int idx) } } - buf = &data[old & md->mask]; - size = head - old; - old += size; + buf = &data[start & md->mask]; + size = end - start; + start += size; if (record__write(rec, buf, size) < 0) { rc = -1; goto out; } - md->prev = old; + md->prev = head; perf_evlist__mmap_consume(rec->evlist, idx); out: return rc; -- cgit From 3a62a7b8200a177ad96161e4f2678514e6ee301e Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 23 May 2016 07:13:41 +0000 Subject: perf record: Read from backward ring buffer Introduce rb_find_range() to find start and end position from a backward ring buffer. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1463987628-163563-5-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/evlist.c | 1 + tools/perf/util/evlist.h | 1 + 3 files changed, 54 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 73ce651c84f6..dc3fcb597e4c 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -83,6 +83,54 @@ static int process_synthesized_event(struct perf_tool *tool, return record__write(rec, event, event->header.size); } +static int +backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) +{ + struct perf_event_header *pheader; + u64 evt_head = head; + int size = mask + 1; + + pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); + pheader = (struct perf_event_header *)(buf + (head & mask)); + *start = head; + while (true) { + if (evt_head - head >= (unsigned int)size) { + pr_debug("Finshed reading backward ring buffer: rewind\n"); + if (evt_head - head > (unsigned int)size) + evt_head -= pheader->size; + *end = evt_head; + return 0; + } + + pheader = (struct perf_event_header *)(buf + (evt_head & mask)); + + if (pheader->size == 0) { + pr_debug("Finshed reading backward ring buffer: get start\n"); + *end = evt_head; + return 0; + } + + evt_head += pheader->size; + pr_debug3("move evt_head: %"PRIx64"\n", evt_head); + } + WARN_ONCE(1, "Shouldn't get here\n"); + return -1; +} + +static int +rb_find_range(struct perf_evlist *evlist, + void *data, int mask, u64 head, u64 old, + u64 *start, u64 *end) +{ + if (!evlist->backward) { + *start = old; + *end = head; + return 0; + } + + return backward_rb_find_range(data, mask, head, start, end); +} + static int record__mmap_read(struct record *rec, int idx) { struct perf_mmap *md = &rec->evlist->mmap[idx]; @@ -94,6 +142,10 @@ static int record__mmap_read(struct record *rec, int idx) void *buf; int rc = 0; + if (rb_find_range(rec->evlist, data, md->mask, head, + old, &start, &end)) + return -1; + if (start == end) return 0; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 904523a2be90..e82ba90cc969 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -44,6 +44,7 @@ void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, perf_evlist__set_maps(evlist, cpus, threads); fdarray__init(&evlist->pollfd, 64); evlist->workload.pid = -1; + evlist->backward = false; } struct perf_evlist *perf_evlist__new(void) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 97090b70976d..d740fb877ab6 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -44,6 +44,7 @@ struct perf_evlist { bool overwrite; bool enabled; bool has_user_cpus; + bool backward; size_t mmap_len; int id_pos; int is_pos; -- cgit From f9bc9e65fbec1781f64effc6bcc3b6edc57f8d19 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Wed, 18 May 2016 13:26:21 +0200 Subject: tools: Add kvm_stat vm monitor script This tool displays kvm vm exit statistics to ease vm monitoring. It takes its data from the kvm debugfs files or the vm tracepoints and outputs them as a curses ui or simple text. It was moved from qemu, as it is dependent on the kernel whereas qemu works with a large number of kernel versions, some of which may break the script. Signed-off-by: Janosch Frank Signed-off-by: Paolo Bonzini --- tools/Makefile | 6 +- tools/kvm/kvm_stat/Makefile | 5 + tools/kvm/kvm_stat/kvm_stat | 825 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 835 insertions(+), 1 deletion(-) create mode 100644 tools/kvm/kvm_stat/Makefile create mode 100755 tools/kvm/kvm_stat/kvm_stat (limited to 'tools') diff --git a/tools/Makefile b/tools/Makefile index 60c7e6c8ff17..061944681754 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -16,6 +16,7 @@ help: @echo ' gpio - GPIO tools' @echo ' hv - tools used when in Hyper-V clients' @echo ' iio - IIO tools' + @echo ' kvm_stat - top-like utility for displaying kvm statistics' @echo ' lguest - a minimal 32-bit x86 hypervisor' @echo ' net - misc networking tools' @echo ' perf - Linux performance measurement and analysis tool' @@ -110,10 +111,13 @@ tmon_install: freefall_install: $(call descend,laptop/$(@:_install=),install) +kvm_stat_install: + $(call descend,kvm/$(@:_install=),install) + install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install net_install x86_energy_perf_policy_install \ - tmon_install freefall_install objtool_install + tmon_install freefall_install objtool_install kvm_stat_install acpi_clean: $(call descend,power/acpi,clean) diff --git a/tools/kvm/kvm_stat/Makefile b/tools/kvm/kvm_stat/Makefile new file mode 100644 index 000000000000..c639b8d30688 --- /dev/null +++ b/tools/kvm/kvm_stat/Makefile @@ -0,0 +1,5 @@ +BINDIR=usr/bin + +install: + mkdir -p $(INSTALL_ROOT)/$(BINDIR) + install -m 755 -p "kvm_stat" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat new file mode 100755 index 000000000000..769d884b6d94 --- /dev/null +++ b/tools/kvm/kvm_stat/kvm_stat @@ -0,0 +1,825 @@ +#!/usr/bin/python +# +# top-like utility for displaying kvm statistics +# +# Copyright 2006-2008 Qumranet Technologies +# Copyright 2008-2011 Red Hat, Inc. +# +# Authors: +# Avi Kivity +# +# This work is licensed under the terms of the GNU GPL, version 2. See +# the COPYING file in the top-level directory. + +import curses +import sys +import os +import time +import optparse +import ctypes +import fcntl +import resource +import struct +import re +from collections import defaultdict +from time import sleep + +VMX_EXIT_REASONS = { + 'EXCEPTION_NMI': 0, + 'EXTERNAL_INTERRUPT': 1, + 'TRIPLE_FAULT': 2, + 'PENDING_INTERRUPT': 7, + 'NMI_WINDOW': 8, + 'TASK_SWITCH': 9, + 'CPUID': 10, + 'HLT': 12, + 'INVLPG': 14, + 'RDPMC': 15, + 'RDTSC': 16, + 'VMCALL': 18, + 'VMCLEAR': 19, + 'VMLAUNCH': 20, + 'VMPTRLD': 21, + 'VMPTRST': 22, + 'VMREAD': 23, + 'VMRESUME': 24, + 'VMWRITE': 25, + 'VMOFF': 26, + 'VMON': 27, + 'CR_ACCESS': 28, + 'DR_ACCESS': 29, + 'IO_INSTRUCTION': 30, + 'MSR_READ': 31, + 'MSR_WRITE': 32, + 'INVALID_STATE': 33, + 'MWAIT_INSTRUCTION': 36, + 'MONITOR_INSTRUCTION': 39, + 'PAUSE_INSTRUCTION': 40, + 'MCE_DURING_VMENTRY': 41, + 'TPR_BELOW_THRESHOLD': 43, + 'APIC_ACCESS': 44, + 'EPT_VIOLATION': 48, + 'EPT_MISCONFIG': 49, + 'WBINVD': 54, + 'XSETBV': 55, + 'APIC_WRITE': 56, + 'INVPCID': 58, +} + +SVM_EXIT_REASONS = { + 'READ_CR0': 0x000, + 'READ_CR3': 0x003, + 'READ_CR4': 0x004, + 'READ_CR8': 0x008, + 'WRITE_CR0': 0x010, + 'WRITE_CR3': 0x013, + 'WRITE_CR4': 0x014, + 'WRITE_CR8': 0x018, + 'READ_DR0': 0x020, + 'READ_DR1': 0x021, + 'READ_DR2': 0x022, + 'READ_DR3': 0x023, + 'READ_DR4': 0x024, + 'READ_DR5': 0x025, + 'READ_DR6': 0x026, + 'READ_DR7': 0x027, + 'WRITE_DR0': 0x030, + 'WRITE_DR1': 0x031, + 'WRITE_DR2': 0x032, + 'WRITE_DR3': 0x033, + 'WRITE_DR4': 0x034, + 'WRITE_DR5': 0x035, + 'WRITE_DR6': 0x036, + 'WRITE_DR7': 0x037, + 'EXCP_BASE': 0x040, + 'INTR': 0x060, + 'NMI': 0x061, + 'SMI': 0x062, + 'INIT': 0x063, + 'VINTR': 0x064, + 'CR0_SEL_WRITE': 0x065, + 'IDTR_READ': 0x066, + 'GDTR_READ': 0x067, + 'LDTR_READ': 0x068, + 'TR_READ': 0x069, + 'IDTR_WRITE': 0x06a, + 'GDTR_WRITE': 0x06b, + 'LDTR_WRITE': 0x06c, + 'TR_WRITE': 0x06d, + 'RDTSC': 0x06e, + 'RDPMC': 0x06f, + 'PUSHF': 0x070, + 'POPF': 0x071, + 'CPUID': 0x072, + 'RSM': 0x073, + 'IRET': 0x074, + 'SWINT': 0x075, + 'INVD': 0x076, + 'PAUSE': 0x077, + 'HLT': 0x078, + 'INVLPG': 0x079, + 'INVLPGA': 0x07a, + 'IOIO': 0x07b, + 'MSR': 0x07c, + 'TASK_SWITCH': 0x07d, + 'FERR_FREEZE': 0x07e, + 'SHUTDOWN': 0x07f, + 'VMRUN': 0x080, + 'VMMCALL': 0x081, + 'VMLOAD': 0x082, + 'VMSAVE': 0x083, + 'STGI': 0x084, + 'CLGI': 0x085, + 'SKINIT': 0x086, + 'RDTSCP': 0x087, + 'ICEBP': 0x088, + 'WBINVD': 0x089, + 'MONITOR': 0x08a, + 'MWAIT': 0x08b, + 'MWAIT_COND': 0x08c, + 'XSETBV': 0x08d, + 'NPF': 0x400, +} + +# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h) +AARCH64_EXIT_REASONS = { + 'UNKNOWN': 0x00, + 'WFI': 0x01, + 'CP15_32': 0x03, + 'CP15_64': 0x04, + 'CP14_MR': 0x05, + 'CP14_LS': 0x06, + 'FP_ASIMD': 0x07, + 'CP10_ID': 0x08, + 'CP14_64': 0x0C, + 'ILL_ISS': 0x0E, + 'SVC32': 0x11, + 'HVC32': 0x12, + 'SMC32': 0x13, + 'SVC64': 0x15, + 'HVC64': 0x16, + 'SMC64': 0x17, + 'SYS64': 0x18, + 'IABT': 0x20, + 'IABT_HYP': 0x21, + 'PC_ALIGN': 0x22, + 'DABT': 0x24, + 'DABT_HYP': 0x25, + 'SP_ALIGN': 0x26, + 'FP_EXC32': 0x28, + 'FP_EXC64': 0x2C, + 'SERROR': 0x2F, + 'BREAKPT': 0x30, + 'BREAKPT_HYP': 0x31, + 'SOFTSTP': 0x32, + 'SOFTSTP_HYP': 0x33, + 'WATCHPT': 0x34, + 'WATCHPT_HYP': 0x35, + 'BKPT32': 0x38, + 'VECTOR32': 0x3A, + 'BRK64': 0x3C, +} + +# From include/uapi/linux/kvm.h, KVM_EXIT_xxx +USERSPACE_EXIT_REASONS = { + 'UNKNOWN': 0, + 'EXCEPTION': 1, + 'IO': 2, + 'HYPERCALL': 3, + 'DEBUG': 4, + 'HLT': 5, + 'MMIO': 6, + 'IRQ_WINDOW_OPEN': 7, + 'SHUTDOWN': 8, + 'FAIL_ENTRY': 9, + 'INTR': 10, + 'SET_TPR': 11, + 'TPR_ACCESS': 12, + 'S390_SIEIC': 13, + 'S390_RESET': 14, + 'DCR': 15, + 'NMI': 16, + 'INTERNAL_ERROR': 17, + 'OSI': 18, + 'PAPR_HCALL': 19, + 'S390_UCONTROL': 20, + 'WATCHDOG': 21, + 'S390_TSCH': 22, + 'EPR': 23, + 'SYSTEM_EVENT': 24, +} + +IOCTL_NUMBERS = { + 'SET_FILTER': 0x40082406, + 'ENABLE': 0x00002400, + 'DISABLE': 0x00002401, + 'RESET': 0x00002403, +} + +class Arch(object): + """Class that encapsulates global architecture specific data like + syscall and ioctl numbers. + + """ + @staticmethod + def get_arch(): + machine = os.uname()[4] + + if machine.startswith('ppc'): + return ArchPPC() + elif machine.startswith('aarch64'): + return ArchA64() + elif machine.startswith('s390'): + return ArchS390() + else: + # X86_64 + for line in open('/proc/cpuinfo'): + if not line.startswith('flags'): + continue + + flags = line.split() + if 'vmx' in flags: + return ArchX86(VMX_EXIT_REASONS) + if 'svm' in flags: + return ArchX86(SVM_EXIT_REASONS) + return + +class ArchX86(Arch): + def __init__(self, exit_reasons): + self.sc_perf_evt_open = 298 + self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reasons = exit_reasons + +class ArchPPC(Arch): + def __init__(self): + self.sc_perf_evt_open = 319 + self.ioctl_numbers = IOCTL_NUMBERS + self.ioctl_numbers['ENABLE'] = 0x20002400 + self.ioctl_numbers['DISABLE'] = 0x20002401 + + # PPC comes in 32 and 64 bit and some generated ioctl + # numbers depend on the wordsize. + char_ptr_size = ctypes.sizeof(ctypes.c_char_p) + self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 + +class ArchA64(Arch): + def __init__(self): + self.sc_perf_evt_open = 241 + self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reasons = AARCH64_EXIT_REASONS + +class ArchS390(Arch): + def __init__(self): + self.sc_perf_evt_open = 331 + self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reasons = None + +ARCH = Arch.get_arch() + + +def walkdir(path): + """Returns os.walk() data for specified directory. + + As it is only a wrapper it returns the same 3-tuple of (dirpath, + dirnames, filenames). + """ + return next(os.walk(path)) + + +def parse_int_list(list_string): + """Returns an int list from a string of comma separated integers and + integer ranges.""" + integers = [] + members = list_string.split(',') + + for member in members: + if '-' not in member: + integers.append(int(member)) + else: + int_range = member.split('-') + integers.extend(range(int(int_range[0]), + int(int_range[1]) + 1)) + + return integers + + +def get_online_cpus(): + with open('/sys/devices/system/cpu/online') as cpu_list: + cpu_string = cpu_list.readline() + return parse_int_list(cpu_string) + + +def get_filters(): + filters = {} + filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) + if ARCH.exit_reasons: + filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) + return filters + +libc = ctypes.CDLL('libc.so.6', use_errno=True) +syscall = libc.syscall + +class perf_event_attr(ctypes.Structure): + _fields_ = [('type', ctypes.c_uint32), + ('size', ctypes.c_uint32), + ('config', ctypes.c_uint64), + ('sample_freq', ctypes.c_uint64), + ('sample_type', ctypes.c_uint64), + ('read_format', ctypes.c_uint64), + ('flags', ctypes.c_uint64), + ('wakeup_events', ctypes.c_uint32), + ('bp_type', ctypes.c_uint32), + ('bp_addr', ctypes.c_uint64), + ('bp_len', ctypes.c_uint64), + ] + + def __init__(self): + super(self.__class__, self).__init__() + self.type = PERF_TYPE_TRACEPOINT + self.size = ctypes.sizeof(self) + self.read_format = PERF_FORMAT_GROUP + +def perf_event_open(attr, pid, cpu, group_fd, flags): + return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr), + ctypes.c_int(pid), ctypes.c_int(cpu), + ctypes.c_int(group_fd), ctypes.c_long(flags)) + +PERF_TYPE_TRACEPOINT = 2 +PERF_FORMAT_GROUP = 1 << 3 + +PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' +PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' + +class Group(object): + def __init__(self): + self.events = [] + + def add_event(self, event): + self.events.append(event) + + def read(self): + length = 8 * (1 + len(self.events)) + read_format = 'xxxxxxxx' + 'Q' * len(self.events) + return dict(zip([event.name for event in self.events], + struct.unpack(read_format, + os.read(self.events[0].fd, length)))) + +class Event(object): + def __init__(self, name, group, trace_cpu, trace_point, trace_filter, + trace_set='kvm'): + self.name = name + self.fd = None + self.setup_event(group, trace_cpu, trace_point, trace_filter, + trace_set) + + def setup_event_attribute(self, trace_set, trace_point): + id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set, + trace_point, 'id') + + event_attr = perf_event_attr() + event_attr.config = int(open(id_path).read()) + return event_attr + + def setup_event(self, group, trace_cpu, trace_point, trace_filter, + trace_set): + event_attr = self.setup_event_attribute(trace_set, trace_point) + + group_leader = -1 + if group.events: + group_leader = group.events[0].fd + + fd = perf_event_open(event_attr, -1, trace_cpu, + group_leader, 0) + if fd == -1: + err = ctypes.get_errno() + raise OSError(err, os.strerror(err), + 'while calling sys_perf_event_open().') + + if trace_filter: + fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'], + trace_filter) + + self.fd = fd + + def enable(self): + fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0) + + def disable(self): + fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0) + + def reset(self): + fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0) + +class TracepointProvider(object): + def __init__(self): + self.group_leaders = [] + self.filters = get_filters() + self._fields = self.get_available_fields() + self.setup_traces() + self.fields = self._fields + + def get_available_fields(self): + path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm') + fields = walkdir(path)[1] + extra = [] + for field in fields: + if field in self.filters: + filter_name_, filter_dicts = self.filters[field] + for name in filter_dicts: + extra.append(field + '(' + name + ')') + fields += extra + return fields + + def setup_traces(self): + cpus = get_online_cpus() + + # The constant is needed as a buffer for python libs, std + # streams and other files that the script opens. + newlim = len(cpus) * len(self._fields) + 50 + try: + softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE) + + if hardlim < newlim: + # Now we need CAP_SYS_RESOURCE, to increase the hard limit. + resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim)) + else: + # Raising the soft limit is sufficient. + resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim)) + + except ValueError: + sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim)) + + for cpu in cpus: + group = Group() + for name in self._fields: + tracepoint = name + tracefilter = None + match = re.match(r'(.*)\((.*)\)', name) + if match: + tracepoint, sub = match.groups() + tracefilter = ('%s==%d\0' % + (self.filters[tracepoint][0], + self.filters[tracepoint][1][sub])) + + group.add_event(Event(name=name, + group=group, + trace_cpu=cpu, + trace_point=tracepoint, + trace_filter=tracefilter)) + self.group_leaders.append(group) + + def available_fields(self): + return self.get_available_fields() + + @property + def fields(self): + return self._fields + + @fields.setter + def fields(self, fields): + self._fields = fields + for group in self.group_leaders: + for index, event in enumerate(group.events): + if event.name in fields: + event.reset() + event.enable() + else: + # Do not disable the group leader. + # It would disable all of its events. + if index != 0: + event.disable() + + def read(self): + ret = defaultdict(int) + for group in self.group_leaders: + for name, val in group.read().iteritems(): + if name in self._fields: + ret[name] += val + return ret + +class DebugfsProvider(object): + def __init__(self): + self._fields = self.get_available_fields() + + def get_available_fields(self): + return walkdir(PATH_DEBUGFS_KVM)[2] + + @property + def fields(self): + return self._fields + + @fields.setter + def fields(self, fields): + self._fields = fields + + def read(self): + def val(key): + return int(file(PATH_DEBUGFS_KVM + '/' + key).read()) + return dict([(key, val(key)) for key in self._fields]) + +class Stats(object): + def __init__(self, providers, fields=None): + self.providers = providers + self._fields_filter = fields + self.values = {} + self.update_provider_filters() + + def update_provider_filters(self): + def wanted(key): + if not self._fields_filter: + return True + return re.match(self._fields_filter, key) is not None + + # As we reset the counters when updating the fields we can + # also clear the cache of old values. + self.values = {} + for provider in self.providers: + provider_fields = [key for key in provider.get_available_fields() + if wanted(key)] + provider.fields = provider_fields + + @property + def fields_filter(self): + return self._fields_filter + + @fields_filter.setter + def fields_filter(self, fields_filter): + self._fields_filter = fields_filter + self.update_provider_filters() + + def get(self): + for provider in self.providers: + new = provider.read() + for key in provider.fields: + oldval = self.values.get(key, (0, 0)) + newval = new.get(key, 0) + newdelta = None + if oldval is not None: + newdelta = newval - oldval[0] + self.values[key] = (newval, newdelta) + return self.values + +LABEL_WIDTH = 40 +NUMBER_WIDTH = 10 + +class Tui(object): + def __init__(self, stats): + self.stats = stats + self.screen = None + self.drilldown = False + self.update_drilldown() + + def __enter__(self): + """Initialises curses for later use. Based on curses.wrapper + implementation from the Python standard library.""" + self.screen = curses.initscr() + curses.noecho() + curses.cbreak() + + # The try/catch works around a minor bit of + # over-conscientiousness in the curses module, the error + # return from C start_color() is ignorable. + try: + curses.start_color() + except: + pass + + curses.use_default_colors() + return self + + def __exit__(self, *exception): + """Resets the terminal to its normal state. Based on curses.wrappre + implementation from the Python standard library.""" + if self.screen: + self.screen.keypad(0) + curses.echo() + curses.nocbreak() + curses.endwin() + + def update_drilldown(self): + if not self.stats.fields_filter: + self.stats.fields_filter = r'^[^\(]*$' + + elif self.stats.fields_filter == r'^[^\(]*$': + self.stats.fields_filter = None + + def refresh(self, sleeptime): + self.screen.erase() + self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) + self.screen.addstr(2, 1, 'Event') + self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - + len('Total'), 'Total') + self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 - + len('Current'), 'Current') + row = 3 + stats = self.stats.get() + def sortkey(x): + if stats[x][1]: + return (-stats[x][1], -stats[x][0]) + else: + return (0, -stats[x][0]) + for key in sorted(stats.keys(), key=sortkey): + + if row >= self.screen.getmaxyx()[0]: + break + values = stats[key] + if not values[0] and not values[1]: + break + col = 1 + self.screen.addstr(row, col, key) + col += LABEL_WIDTH + self.screen.addstr(row, col, '%10d' % (values[0],)) + col += NUMBER_WIDTH + if values[1] is not None: + self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) + row += 1 + self.screen.refresh() + + def show_filter_selection(self): + while True: + self.screen.erase() + self.screen.addstr(0, 0, + "Show statistics for events matching a regex.", + curses.A_BOLD) + self.screen.addstr(2, 0, + "Current regex: {0}" + .format(self.stats.fields_filter)) + self.screen.addstr(3, 0, "New regex: ") + curses.echo() + regex = self.screen.getstr() + curses.noecho() + if len(regex) == 0: + return + try: + re.compile(regex) + self.stats.fields_filter = regex + return + except re.error: + continue + + def show_stats(self): + sleeptime = 0.25 + while True: + self.refresh(sleeptime) + curses.halfdelay(int(sleeptime * 10)) + sleeptime = 3 + try: + char = self.screen.getkey() + if char == 'x': + self.drilldown = not self.drilldown + self.update_drilldown() + if char == 'q': + break + if char == 'f': + self.show_filter_selection() + except KeyboardInterrupt: + break + except curses.error: + continue + +def batch(stats): + s = stats.get() + time.sleep(1) + s = stats.get() + for key in sorted(s.keys()): + values = s[key] + print '%-42s%10d%10d' % (key, values[0], values[1]) + +def log(stats): + keys = sorted(stats.get().iterkeys()) + def banner(): + for k in keys: + print '%s' % k, + print + def statline(): + s = stats.get() + for k in keys: + print ' %9d' % s[k][1], + print + line = 0 + banner_repeat = 20 + while True: + time.sleep(1) + if line % banner_repeat == 0: + banner() + statline() + line += 1 + +def get_options(): + description_text = """ +This script displays various statistics about VMs running under KVM. +The statistics are gathered from the KVM debugfs entries and / or the +currently available perf traces. + +The monitoring takes additional cpu cycles and might affect the VM's +performance. + +Requirements: +- Access to: + /sys/kernel/debug/kvm + /sys/kernel/debug/trace/events/* + /proc/pid/task +- /proc/sys/kernel/perf_event_paranoid < 1 if user has no + CAP_SYS_ADMIN and perf events are used. +- CAP_SYS_RESOURCE if the hard limit is not high enough to allow + the large number of files that are possibly opened. +""" + + class PlainHelpFormatter(optparse.IndentedHelpFormatter): + def format_description(self, description): + if description: + return description + "\n" + else: + return "" + + optparser = optparse.OptionParser(description=description_text, + formatter=PlainHelpFormatter()) + optparser.add_option('-1', '--once', '--batch', + action='store_true', + default=False, + dest='once', + help='run in batch mode for one second', + ) + optparser.add_option('-l', '--log', + action='store_true', + default=False, + dest='log', + help='run in logging mode (like vmstat)', + ) + optparser.add_option('-t', '--tracepoints', + action='store_true', + default=False, + dest='tracepoints', + help='retrieve statistics from tracepoints', + ) + optparser.add_option('-d', '--debugfs', + action='store_true', + default=False, + dest='debugfs', + help='retrieve statistics from debugfs', + ) + optparser.add_option('-f', '--fields', + action='store', + default=None, + dest='fields', + help='fields to display (regex)', + ) + (options, _) = optparser.parse_args(sys.argv) + return options + +def get_providers(options): + providers = [] + + if options.tracepoints: + providers.append(TracepointProvider()) + if options.debugfs: + providers.append(DebugfsProvider()) + if len(providers) == 0: + providers.append(TracepointProvider()) + + return providers + +def check_access(options): + if not os.path.exists('/sys/kernel/debug'): + sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.') + sys.exit(1) + + if not os.path.exists(PATH_DEBUGFS_KVM): + sys.stderr.write("Please make sure, that debugfs is mounted and " + "readable by the current user:\n" + "('mount -t debugfs debugfs /sys/kernel/debug')\n" + "Also ensure, that the kvm modules are loaded.\n") + sys.exit(1) + + if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints + or not options.debugfs): + sys.stderr.write("Please enable CONFIG_TRACING in your kernel " + "when using the option -t (default).\n" + "If it is enabled, make {0} readable by the " + "current user.\n" + .format(PATH_DEBUGFS_TRACING)) + if options.tracepoints: + sys.exit(1) + + sys.stderr.write("Falling back to debugfs statistics!\n") + options.debugfs = True + sleep(5) + + return options + +def main(): + options = get_options() + options = check_access(options) + providers = get_providers(options) + stats = Stats(providers, fields=options.fields) + + if options.log: + log(stats) + elif not options.once: + with Tui(stats) as tui: + tui.show_stats() + else: + batch(stats) + +if __name__ == "__main__": + main() -- cgit From fd41b5a96948f67785458c04e9ddb5ef619251d5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 24 May 2016 10:41:15 +0200 Subject: tools: Add kvm_stat man page Converted from the Texinfo source in QEMU to asciidoc. The a2x incantation was provided by Janosch Frank. Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/Makefile | 40 +++++++++++++++++++++++++-- tools/kvm/kvm_stat/kvm_stat.txt | 61 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 tools/kvm/kvm_stat/kvm_stat.txt (limited to 'tools') diff --git a/tools/kvm/kvm_stat/Makefile b/tools/kvm/kvm_stat/Makefile index c639b8d30688..5b1cba57e3b3 100644 --- a/tools/kvm/kvm_stat/Makefile +++ b/tools/kvm/kvm_stat/Makefile @@ -1,5 +1,41 @@ +include ../../scripts/Makefile.include +include ../../scripts/utilities.mak BINDIR=usr/bin +MANDIR=usr/share/man +MAN1DIR=$(MANDIR)/man1 -install: - mkdir -p $(INSTALL_ROOT)/$(BINDIR) +MAN1=kvm_stat.1 + +A2X=a2x +a2x_path := $(call get-executable,$(A2X)) + +all: man + +ifneq ($(findstring $(MAKEFLAGS),s),s) + ifneq ($(V),1) + QUIET_A2X = @echo ' A2X '$@; + endif +endif + +%.1: %.txt +ifeq ($(a2x_path),) + $(error "You need to install asciidoc for man pages") +else + $(QUIET_A2X)$(A2X) --doctype manpage --format manpage $< +endif + +clean: + rm -f $(MAN1) + +man: $(MAN1) + +install-man: man + install -d -m 755 $(INSTALL_ROOT)/$(MAN1DIR) + install -m 644 kvm_stat.1 $(INSTALL_ROOT)/$(MAN1DIR) + +install-tools: + install -d -m 755 $(INSTALL_ROOT)/$(BINDIR) install -m 755 -p "kvm_stat" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" + +install: install-tools install-man +.PHONY: all clean man install-tools install-man install diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt new file mode 100644 index 000000000000..8dcb48aaca50 --- /dev/null +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -0,0 +1,61 @@ +kvm_stat(1) +=========== + +NAME +---- +kvm_stat - Report KVM kernel module event counters + +SYNOPSIS +-------- +[verse] +'kvm_stat' [OPTION]... + +DESCRIPTION +----------- +kvm_stat prints counts of KVM kernel module trace events. These events signify +state transitions such as guest mode entry and exit. + +This tool is useful for observing guest behavior from the host perspective. +Often conclusions about performance or buggy behavior can be drawn from the +output. + +The set of KVM kernel module trace events may be specific to the kernel version +or architecture. It is best to check the KVM kernel module source code for the +meaning of events. + +Note that trace events are counted globally across all running guests. + +OPTIONS +------- +-1:: +--once:: +--batch:: + run in batch mode for one second + +-l:: +--log:: + run in logging mode (like vmstat) + +-t:: +--tracepoints:: + retrieve statistics from tracepoints + +-d:: +--debugfs:: + retrieve statistics from debugfs + +-f:: +--fields=:: + fields to display (regex) + +-h:: +--help:: + show help message + +SEE ALSO +-------- +'perf'(1), 'trace-cmd'(1) + +AUTHOR +------ +Stefan Hajnoczi -- cgit From c7d4fb5a6ebea2d3e8b3a1c251cc30bc56319897 Mon Sep 17 00:00:00 2001 From: Hemant Kumar Date: Tue, 19 Apr 2016 08:54:54 +0530 Subject: tools: kvm_stat: Powerpc related fixes kvm_stat script is failing to execute on powerpc : # ./kvm_stat Traceback (most recent call last): File "./kvm_stat", line 825, in main() File "./kvm_stat", line 813, in main providers = get_providers(options) File "./kvm_stat", line 778, in get_providers providers.append(TracepointProvider()) File "./kvm_stat", line 416, in __init__ self.filters = get_filters() File "./kvm_stat", line 315, in get_filters if ARCH.exit_reasons: AttributeError: 'ArchPPC' object has no attribute 'exit_reasons' This is because, its trying to access a non-defined attribute. Also, the IOCTL number of RESET is incorrect for powerpc. The correct number has been added. Signed-off-by: Hemant Kumar Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 769d884b6d94..27d217a4c4c1 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -256,11 +256,13 @@ class ArchPPC(Arch): self.ioctl_numbers = IOCTL_NUMBERS self.ioctl_numbers['ENABLE'] = 0x20002400 self.ioctl_numbers['DISABLE'] = 0x20002401 + self.ioctl_numbers['RESET'] = 0x20002403 # PPC comes in 32 and 64 bit and some generated ioctl # numbers depend on the wordsize. char_ptr_size = ctypes.sizeof(ctypes.c_char_p) self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 + self.exit_reasons = {} class ArchA64(Arch): def __init__(self): -- cgit From f0cf040f842242d55744c2606e8b7177507fbbb0 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Wed, 18 May 2016 13:26:24 +0200 Subject: tools: kvm_stat: Introduce pid monitoring Having stats for single VMs can help to determine the problem of a VM without the need of running other tools like perf. The tracepoints already allowed pid level monitoring, but kvm_stat didn't have support for it till now. Support for the newly implemented debugfs vm monitoring was also implemented. Signed-off-by: Janosch Frank Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 183 +++++++++++++++++++++++++++++++++++----- tools/kvm/kvm_stat/kvm_stat.txt | 6 +- 2 files changed, 167 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index 27d217a4c4c1..b4d50e8eb75a 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -367,12 +367,16 @@ class Group(object): os.read(self.events[0].fd, length)))) class Event(object): - def __init__(self, name, group, trace_cpu, trace_point, trace_filter, - trace_set='kvm'): + def __init__(self, name, group, trace_cpu, trace_pid, trace_point, + trace_filter, trace_set='kvm'): self.name = name self.fd = None - self.setup_event(group, trace_cpu, trace_point, trace_filter, - trace_set) + self.setup_event(group, trace_cpu, trace_pid, trace_point, + trace_filter, trace_set) + + def __del__(self): + if self.fd: + os.close(self.fd) def setup_event_attribute(self, trace_set, trace_point): id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set, @@ -382,16 +386,16 @@ class Event(object): event_attr.config = int(open(id_path).read()) return event_attr - def setup_event(self, group, trace_cpu, trace_point, trace_filter, - trace_set): + def setup_event(self, group, trace_cpu, trace_pid, trace_point, + trace_filter, trace_set): event_attr = self.setup_event_attribute(trace_set, trace_point) group_leader = -1 if group.events: group_leader = group.events[0].fd - fd = perf_event_open(event_attr, -1, trace_cpu, - group_leader, 0) + fd = perf_event_open(event_attr, trace_pid, + trace_cpu, group_leader, 0) if fd == -1: err = ctypes.get_errno() raise OSError(err, os.strerror(err), @@ -417,8 +421,7 @@ class TracepointProvider(object): self.group_leaders = [] self.filters = get_filters() self._fields = self.get_available_fields() - self.setup_traces() - self.fields = self._fields + self._pid = 0 def get_available_fields(self): path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm') @@ -433,11 +436,17 @@ class TracepointProvider(object): return fields def setup_traces(self): - cpus = get_online_cpus() + if self._pid > 0: + # Fetch list of all threads of the monitored pid, as qemu + # starts a thread for each vcpu. + path = os.path.join('/proc', str(self._pid), 'task') + groupids = walkdir(path)[1] + else: + groupids = get_online_cpus() # The constant is needed as a buffer for python libs, std # streams and other files that the script opens. - newlim = len(cpus) * len(self._fields) + 50 + newlim = len(groupids) * len(self._fields) + 50 try: softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE) @@ -451,7 +460,7 @@ class TracepointProvider(object): except ValueError: sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim)) - for cpu in cpus: + for groupid in groupids: group = Group() for name in self._fields: tracepoint = name @@ -463,11 +472,22 @@ class TracepointProvider(object): (self.filters[tracepoint][0], self.filters[tracepoint][1][sub])) + # From perf_event_open(2): + # pid > 0 and cpu == -1 + # This measures the specified process/thread on any CPU. + # + # pid == -1 and cpu >= 0 + # This measures all processes/threads on the specified CPU. + trace_cpu = groupid if self._pid == 0 else -1 + trace_pid = int(groupid) if self._pid != 0 else -1 + group.add_event(Event(name=name, group=group, - trace_cpu=cpu, + trace_cpu=trace_cpu, + trace_pid=trace_pid, trace_point=tracepoint, trace_filter=tracefilter)) + self.group_leaders.append(group) def available_fields(self): @@ -491,6 +511,17 @@ class TracepointProvider(object): if index != 0: event.disable() + @property + def pid(self): + return self._pid + + @pid.setter + def pid(self, pid): + self._pid = pid + self.group_leaders = [] + self.setup_traces() + self.fields = self._fields + def read(self): ret = defaultdict(int) for group in self.group_leaders: @@ -502,6 +533,8 @@ class TracepointProvider(object): class DebugfsProvider(object): def __init__(self): self._fields = self.get_available_fields() + self._pid = 0 + self.do_read = True def get_available_fields(self): return walkdir(PATH_DEBUGFS_KVM)[2] @@ -514,16 +547,57 @@ class DebugfsProvider(object): def fields(self, fields): self._fields = fields + @property + def pid(self): + return self._pid + + @pid.setter + def pid(self, pid): + if pid != 0: + self._pid = pid + + vms = walkdir(PATH_DEBUGFS_KVM)[1] + if len(vms) == 0: + self.do_read = False + + self.paths = filter(lambda x: "{}-".format(pid) in x, vms) + + else: + self.paths = [''] + self.do_read = True + def read(self): - def val(key): - return int(file(PATH_DEBUGFS_KVM + '/' + key).read()) - return dict([(key, val(key)) for key in self._fields]) + """Returns a dict with format:'file name / field -> current value'.""" + results = {} + + # If no debugfs filtering support is available, then don't read. + if not self.do_read: + return results + + for path in self.paths: + for field in self._fields: + results[field] = results.get(field, 0) \ + + self.read_field(field, path) + + return results + + def read_field(self, field, path): + """Returns the value of a single field from a specific VM.""" + try: + return int(open(os.path.join(PATH_DEBUGFS_KVM, + path, + field)) + .read()) + except IOError: + return 0 class Stats(object): - def __init__(self, providers, fields=None): + def __init__(self, providers, pid, fields=None): self.providers = providers + self._pid_filter = pid self._fields_filter = fields self.values = {} + self.update_provider_pid() self.update_provider_filters() def update_provider_filters(self): @@ -540,6 +614,10 @@ class Stats(object): if wanted(key)] provider.fields = provider_fields + def update_provider_pid(self): + for provider in self.providers: + provider.pid = self._pid_filter + @property def fields_filter(self): return self._fields_filter @@ -549,6 +627,16 @@ class Stats(object): self._fields_filter = fields_filter self.update_provider_filters() + @property + def pid_filter(self): + return self._pid_filter + + @pid_filter.setter + def pid_filter(self, pid): + self._pid_filter = pid + self.values = {} + self.update_provider_pid() + def get(self): for provider in self.providers: new = provider.read() @@ -605,9 +693,17 @@ class Tui(object): elif self.stats.fields_filter == r'^[^\(]*$': self.stats.fields_filter = None + def update_pid(self, pid): + self.stats.pid_filter = pid + def refresh(self, sleeptime): self.screen.erase() - self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) + if self.stats.pid_filter > 0: + self.screen.addstr(0, 0, 'kvm statistics - pid {0}' + .format(self.stats.pid_filter), + curses.A_BOLD) + else: + self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) self.screen.addstr(2, 1, 'Event') self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - len('Total'), 'Total') @@ -659,6 +755,37 @@ class Tui(object): except re.error: continue + def show_vm_selection(self): + while True: + self.screen.erase() + self.screen.addstr(0, 0, + 'Show statistics for specific pid.', + curses.A_BOLD) + self.screen.addstr(1, 0, + 'This might limit the shown data to the trace ' + 'statistics.') + + curses.echo() + self.screen.addstr(3, 0, "Pid [0 or pid]: ") + pid = self.screen.getstr() + curses.noecho() + + try: + pid = int(pid) + + if pid == 0: + self.update_pid(pid) + break + else: + if not os.path.isdir(os.path.join('/proc/', str(pid))): + continue + else: + self.update_pid(pid) + break + + except ValueError: + continue + def show_stats(self): sleeptime = 0.25 while True: @@ -674,6 +801,8 @@ class Tui(object): break if char == 'f': self.show_filter_selection() + if char == 'p': + self.show_vm_selection() except KeyboardInterrupt: break except curses.error: @@ -766,6 +895,13 @@ Requirements: dest='fields', help='fields to display (regex)', ) + optparser.add_option('-p', '--pid', + action='store', + default=0, + type=int, + dest='pid', + help='restrict statistics to pid', + ) (options, _) = optparser.parse_args(sys.argv) return options @@ -812,8 +948,15 @@ def check_access(options): def main(): options = get_options() options = check_access(options) + + if (options.pid > 0 and + not os.path.isdir(os.path.join('/proc/', + str(options.pid)))): + sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n') + sys.exit('Specified pid does not exist.') + providers = get_providers(options) - stats = Stats(providers, fields=options.fields) + stats = Stats(providers, options.pid, fields=options.fields) if options.log: log(stats) diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index 8dcb48aaca50..b92a153d7115 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -23,8 +23,6 @@ The set of KVM kernel module trace events may be specific to the kernel version or architecture. It is best to check the KVM kernel module source code for the meaning of events. -Note that trace events are counted globally across all running guests. - OPTIONS ------- -1:: @@ -44,6 +42,10 @@ OPTIONS --debugfs:: retrieve statistics from debugfs +-p:: +--pid=:: + limit statistics to one virtual machine (pid) + -f:: --fields=:: fields to display (regex) -- cgit From fabc712866435660f7fa1070e1fabe29eba5bc4c Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Wed, 18 May 2016 13:26:25 +0200 Subject: tools: kvm_stat: Add comments A lot of the code works with the perf events about which only sparse documentation was available until 2012. Having that information now, we can clarify what is done in the code. Signed-off-by: Janosch Frank Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat | 161 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 159 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index b4d50e8eb75a..581278c58488 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -10,6 +10,15 @@ # # This work is licensed under the terms of the GNU GPL, version 2. See # the COPYING file in the top-level directory. +"""The kvm_stat module outputs statistics about running KVM VMs + +Three different ways of output formatting are available: +- as a top-like text ui +- in a key -> value format +- in an all keys, all values format + +The data is sampled from the KVM's debugfs entries and its perf events. +""" import curses import sys @@ -217,8 +226,10 @@ IOCTL_NUMBERS = { } class Arch(object): - """Class that encapsulates global architecture specific data like - syscall and ioctl numbers. + """Encapsulates global architecture specific data. + + Contains the performance event open syscall and ioctl numbers, as + well as the VM exit reasons for the architecture it runs on. """ @staticmethod @@ -306,12 +317,22 @@ def parse_int_list(list_string): def get_online_cpus(): + """Returns a list of cpu id integers.""" with open('/sys/devices/system/cpu/online') as cpu_list: cpu_string = cpu_list.readline() return parse_int_list(cpu_string) def get_filters(): + """Returns a dict of trace events, their filter ids and + the values that can be filtered. + + Trace events can be filtered for special values by setting a + filter string via an ioctl. The string normally has the format + identifier==value. For each filter a new event will be created, to + be able to distinguish the events. + + """ filters = {} filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) if ARCH.exit_reasons: @@ -322,6 +343,14 @@ libc = ctypes.CDLL('libc.so.6', use_errno=True) syscall = libc.syscall class perf_event_attr(ctypes.Structure): + """Struct that holds the necessary data to set up a trace event. + + For an extensive explanation see perf_event_open(2) and + include/uapi/linux/perf_event.h, struct perf_event_attr + + All fields that are not initialized in the constructor are 0. + + """ _fields_ = [('type', ctypes.c_uint32), ('size', ctypes.c_uint32), ('config', ctypes.c_uint64), @@ -342,6 +371,20 @@ class perf_event_attr(ctypes.Structure): self.read_format = PERF_FORMAT_GROUP def perf_event_open(attr, pid, cpu, group_fd, flags): + """Wrapper for the sys_perf_evt_open() syscall. + + Used to set up performance events, returns a file descriptor or -1 + on error. + + Attributes are: + - syscall number + - struct perf_event_attr * + - pid or -1 to monitor all pids + - cpu number or -1 to monitor all cpus + - The file descriptor of the group leader or -1 to create a group. + - flags + + """ return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr), ctypes.c_int(pid), ctypes.c_int(cpu), ctypes.c_int(group_fd), ctypes.c_long(flags)) @@ -353,6 +396,8 @@ PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' class Group(object): + """Represents a perf event group.""" + def __init__(self): self.events = [] @@ -360,6 +405,22 @@ class Group(object): self.events.append(event) def read(self): + """Returns a dict with 'event name: value' for all events in the + group. + + Values are read by reading from the file descriptor of the + event that is the group leader. See perf_event_open(2) for + details. + + Read format for the used event configuration is: + struct read_format { + u64 nr; /* The number of events */ + struct { + u64 value; /* The value of the event */ + } values[nr]; + }; + + """ length = 8 * (1 + len(self.events)) read_format = 'xxxxxxxx' + 'Q' * len(self.events) return dict(zip([event.name for event in self.events], @@ -367,6 +428,7 @@ class Group(object): os.read(self.events[0].fd, length)))) class Event(object): + """Represents a performance event and manages its life cycle.""" def __init__(self, name, group, trace_cpu, trace_pid, trace_point, trace_filter, trace_set='kvm'): self.name = name @@ -375,10 +437,19 @@ class Event(object): trace_filter, trace_set) def __del__(self): + """Closes the event's file descriptor. + + As no python file object was created for the file descriptor, + python will not reference count the descriptor and will not + close it itself automatically, so we do it. + + """ if self.fd: os.close(self.fd) def setup_event_attribute(self, trace_set, trace_point): + """Returns an initialized ctype perf_event_attr struct.""" + id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set, trace_point, 'id') @@ -388,9 +459,19 @@ class Event(object): def setup_event(self, group, trace_cpu, trace_pid, trace_point, trace_filter, trace_set): + """Sets up the perf event in Linux. + + Issues the syscall to register the event in the kernel and + then sets the optional filter. + + """ + event_attr = self.setup_event_attribute(trace_set, trace_point) + # First event will be group leader. group_leader = -1 + + # All others have to pass the leader's descriptor instead. if group.events: group_leader = group.events[0].fd @@ -408,15 +489,33 @@ class Event(object): self.fd = fd def enable(self): + """Enables the trace event in the kernel. + + Enabling the group leader makes reading counters from it and the + events under it possible. + + """ fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0) def disable(self): + """Disables the trace event in the kernel. + + Disabling the group leader makes reading all counters under it + impossible. + + """ fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0) def reset(self): + """Resets the count of the trace event in the kernel.""" fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0) class TracepointProvider(object): + """Data provider for the stats class. + + Manages the events/groups from which it acquires its data. + + """ def __init__(self): self.group_leaders = [] self.filters = get_filters() @@ -424,6 +523,20 @@ class TracepointProvider(object): self._pid = 0 def get_available_fields(self): + """Returns a list of available event's of format 'event name(filter + name)'. + + All available events have directories under + /sys/kernel/debug/tracing/events/ which export information + about the specific event. Therefore, listing the dirs gives us + a list of all available events. + + Some events like the vm exit reasons can be filtered for + specific values. To take account for that, the routine below + creates special fields with the following format: + event name(filter name) + + """ path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm') fields = walkdir(path)[1] extra = [] @@ -436,6 +549,8 @@ class TracepointProvider(object): return fields def setup_traces(self): + """Creates all event and group objects needed to be able to retrieve + data.""" if self._pid > 0: # Fetch list of all threads of the monitored pid, as qemu # starts a thread for each vcpu. @@ -499,6 +614,7 @@ class TracepointProvider(object): @fields.setter def fields(self, fields): + """Enables/disables the (un)wanted events""" self._fields = fields for group in self.group_leaders: for index, event in enumerate(group.events): @@ -517,12 +633,16 @@ class TracepointProvider(object): @pid.setter def pid(self, pid): + """Changes the monitored pid by setting new traces.""" self._pid = pid + # The garbage collector will get rid of all Event/Group + # objects and open files after removing the references. self.group_leaders = [] self.setup_traces() self.fields = self._fields def read(self): + """Returns 'event name: current value' for all enabled events.""" ret = defaultdict(int) for group in self.group_leaders: for name, val in group.read().iteritems(): @@ -531,12 +651,19 @@ class TracepointProvider(object): return ret class DebugfsProvider(object): + """Provides data from the files that KVM creates in the kvm debugfs + folder.""" def __init__(self): self._fields = self.get_available_fields() self._pid = 0 self.do_read = True def get_available_fields(self): + """"Returns a list of available fields. + + The fields are all available KVM debugfs files + + """ return walkdir(PATH_DEBUGFS_KVM)[2] @property @@ -592,6 +719,12 @@ class DebugfsProvider(object): return 0 class Stats(object): + """Manages the data providers and the data they provide. + + It is used to set filters on the provider's data and collect all + provider data. + + """ def __init__(self, providers, pid, fields=None): self.providers = providers self._pid_filter = pid @@ -601,6 +734,7 @@ class Stats(object): self.update_provider_filters() def update_provider_filters(self): + """Propagates fields filters to providers.""" def wanted(key): if not self._fields_filter: return True @@ -615,6 +749,7 @@ class Stats(object): provider.fields = provider_fields def update_provider_pid(self): + """Propagates pid filters to providers.""" for provider in self.providers: provider.pid = self._pid_filter @@ -638,6 +773,8 @@ class Stats(object): self.update_provider_pid() def get(self): + """Returns a dict with field -> (value, delta to last value) of all + provider data.""" for provider in self.providers: new = provider.read() for key in provider.fields: @@ -653,6 +790,7 @@ LABEL_WIDTH = 40 NUMBER_WIDTH = 10 class Tui(object): + """Instruments curses to draw a nice text ui.""" def __init__(self, stats): self.stats = stats self.screen = None @@ -687,6 +825,7 @@ class Tui(object): curses.endwin() def update_drilldown(self): + """Sets or removes a filter that only allows fields without braces.""" if not self.stats.fields_filter: self.stats.fields_filter = r'^[^\(]*$' @@ -694,9 +833,11 @@ class Tui(object): self.stats.fields_filter = None def update_pid(self, pid): + """Propagates pid selection to stats object.""" self.stats.pid_filter = pid def refresh(self, sleeptime): + """Refreshes on-screen data.""" self.screen.erase() if self.stats.pid_filter > 0: self.screen.addstr(0, 0, 'kvm statistics - pid {0}' @@ -734,6 +875,11 @@ class Tui(object): self.screen.refresh() def show_filter_selection(self): + """Draws filter selection mask. + + Asks for a valid regex and sets the fields filter accordingly. + + """ while True: self.screen.erase() self.screen.addstr(0, 0, @@ -756,6 +902,11 @@ class Tui(object): continue def show_vm_selection(self): + """Draws PID selection mask. + + Asks for a pid until a valid pid or 0 has been entered. + + """ while True: self.screen.erase() self.screen.addstr(0, 0, @@ -787,6 +938,7 @@ class Tui(object): continue def show_stats(self): + """Refreshes the screen and processes user input.""" sleeptime = 0.25 while True: self.refresh(sleeptime) @@ -809,6 +961,7 @@ class Tui(object): continue def batch(stats): + """Prints statistics in a key, value format.""" s = stats.get() time.sleep(1) s = stats.get() @@ -817,6 +970,7 @@ def batch(stats): print '%-42s%10d%10d' % (key, values[0], values[1]) def log(stats): + """Prints statistics as reiterating key block, multiple value blocks.""" keys = sorted(stats.get().iterkeys()) def banner(): for k in keys: @@ -837,6 +991,7 @@ def log(stats): line += 1 def get_options(): + """Returns processed program arguments.""" description_text = """ This script displays various statistics about VMs running under KVM. The statistics are gathered from the KVM debugfs entries and / or the @@ -906,6 +1061,7 @@ Requirements: return options def get_providers(options): + """Returns a list of data providers depending on the passed options.""" providers = [] if options.tracepoints: @@ -918,6 +1074,7 @@ def get_providers(options): return providers def check_access(options): + """Exits if the current user can't access all needed directories.""" if not os.path.exists('/sys/kernel/debug'): sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.') sys.exit(1) -- cgit From 38272dc4f1b17437871b786d567e1242d0904f5a Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 24 May 2016 09:21:27 +0000 Subject: perf symbols: Check kptr_restrict for root If kptr_restrict is set to 2, even root is not allowed to see pointers. This patch checks kptr_restrict even if euid == 0. For root, report error if kptr_restrict is 2. Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1464081688-167940-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 20f9cb32b703..54c4ff2b1cee 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1933,17 +1933,17 @@ int setup_intlist(struct intlist **list, const char *list_str, static bool symbol__read_kptr_restrict(void) { bool value = false; + FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); - if (geteuid() != 0) { - FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); - if (fp != NULL) { - char line[8]; + if (fp != NULL) { + char line[8]; - if (fgets(line, sizeof(line), fp) != NULL) - value = atoi(line) != 0; + if (fgets(line, sizeof(line), fp) != NULL) + value = (geteuid() != 0) ? + (atoi(line) != 0) : + (atoi(line) == 2); - fclose(fp); - } + fclose(fp); } return value; -- cgit From 3dc6c1d54ff4cc9ce7e8513c286c970304cde20b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Tue, 24 May 2016 09:21:28 +0000 Subject: perf record: Fix crash when kptr is restricted Before this patch, a simple 'perf record' could fail if kptr_restrict is set to 1 (for normal user) or 2 (for root): # perf record ls WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted, check /proc/sys/kernel/kptr_restrict. Samples in kernel functions may not be resolved if a suitable vmlinux file is not found in the buildid cache or in the vmlinux path. Samples in kernel modules won't be resolved at all. If some relocation was applied (e.g. kexec) symbols may be misresolved even with a suitable vmlinux or kallsyms file. Segmentation fault (core dumped) This patch skips perf_event__synthesize_kernel_mmap() when kptr is not available. Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Fixes: 45e90056904b ("perf machine: Do not bail out if not managing to read ref reloc symbol") Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1464081688-167940-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f6fcc6832949..9b141f12329e 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -673,6 +673,8 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, int err; union perf_event *event; + if (symbol_conf.kptr_restrict) + return -1; if (map == NULL) return -1; -- cgit From 5ea5888b2fbf5b230da62b2a21c8247bebb6c9cf Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 27 May 2016 11:35:51 +0000 Subject: perf ctf: Convert invalid chars in a string before set value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We observed some crazy apps on Android set their comm to unprintable string. For example: # cat /proc/10607/task/*/comm tencent.qqmusic ... Binder_2 日志输出线 <-- Chinese word 'log output thread' WifiManager ... 'perf data convert' fails to convert perf.data with such string to CTF format. For example: # cat << EOF > ./badguy.c #include int main(int argc, char *argv[]) { prctl(PR_SET_NAME, "\xe6\x97\xa5\xe5\xbf\x97\xe8\xbe\x93\xe5\x87\xba\xe7\xba\xbf"); while(1) sleep(1); return 0; } EOF # gcc ./badguy.c # perf record -e sched:* ./a.out # perf data convert --to-ctf ./bad.ctf CTF stream 4 flush failed [ perf data convert: Converted 'perf.data' into CTF data './bad.ctf' ] [ perf data convert: Converted and wrote 0.008 MB (78 samples) ] # babeltrace ./bad.ctf/ [error] Packet size (18446744073709551615 bits) is larger than remaining file size (262144 bits). [error] Stream index creation error. [error] Open file stream error. [warning] [Context] Cannot open_trace of format ctf at path ./bad.ctf. [warning] [Context] cannot open trace "./bad.ctf" from ./bad.ctf/ for reading. [error] Cannot open any trace for reading. [error] opening trace "./bad.ctf/" for reading. [error] none of the specified trace paths could be opened. This patch converts unprintable characters to hexadecimal word. After applying this patch the above test works correctly: # ~/perf data convert --to-ctf ./good.ctf [ perf data convert: Converted 'perf.data' into CTF data './good.ctf' ] [ perf data convert: Converted and wrote 0.008 MB (78 samples) ] # babeltrace ./good.ctf .. [23:14:35.491665268] (+0.000001100) sched:sched_wakeup: { cpu_id = 4 }, { perf_ip = 0xFFFFFFFF810AEF33, perf_tid = 0, perf_pid = 0, perf_id = 5123, perf_period = 1, common_type = 270, common_flags = 45, common_preempt_count = 4, common_pid = 0, comm = "\xe6\x97\xa5\xe5\xbf\x97\xe8\xbe\x93\xe5\x87\xba\xe7\xba\xbf", pid = 1057, prio = 120, success = 1, target_cpu = 4 } [23:14:35.491666230] (+0.000000962) sched:sched_wakeup: { cpu_id = 4 }, { perf_ip = 0xFFFFFFFF810AEF33, perf_tid = 0, perf_pid = 0, perf_id = 5122, perf_period = 1, common_type = 270, common_flags = 45, common_preempt_count = 4, common_pid = 0, comm = "\xe6\x97\xa5\xe5\xbf\x97\xe8\xbe\x93\xe5\x87\xba\xe7\xba\xbf", pid = 1057, prio = 120, success = 1, target_cpu = 4 } .. Committer note: To build perf with libabeltrace, use: $ mkdir -p /tmp/build/perf $ make LIBBABELTRACE=1 LIBBABELTRACE_DIR=/usr/local O=/tmp/build/perf -C tools/perf install-bin Or equivalent (no O=, fixup LIBBABELTRACE_DIR, etc). Signed-off-by: Wang Nan Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1464348951-179595-1-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data-convert-bt.c | 41 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index bbf69d248ec5..9f53020c3269 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -204,6 +204,44 @@ static unsigned long long adjust_signedness(unsigned long long value_int, int si return (value_int & value_mask) | ~value_mask; } +static int string_set_value(struct bt_ctf_field *field, const char *string) +{ + char *buffer = NULL; + size_t len = strlen(string), i, p; + int err; + + for (i = p = 0; i < len; i++, p++) { + if (isprint(string[i])) { + if (!buffer) + continue; + buffer[p] = string[i]; + } else { + char numstr[5]; + + snprintf(numstr, sizeof(numstr), "\\x%02x", + (unsigned int)(string[i]) & 0xff); + + if (!buffer) { + buffer = zalloc(i + (len - i) * 4 + 2); + if (!buffer) { + pr_err("failed to set unprintable string '%s'\n", string); + return bt_ctf_field_string_set_value(field, "UNPRINTABLE-STRING"); + } + if (i > 0) + strncpy(buffer, string, i); + } + strncat(buffer + p, numstr, 4); + p += 3; + } + } + + if (!buffer) + return bt_ctf_field_string_set_value(field, string); + err = bt_ctf_field_string_set_value(field, buffer); + free(buffer); + return err; +} + static int add_tracepoint_field_value(struct ctf_writer *cw, struct bt_ctf_event_class *event_class, struct bt_ctf_event *event, @@ -270,8 +308,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, } if (flags & FIELD_IS_STRING) - ret = bt_ctf_field_string_set_value(field, - data + offset + i * len); + ret = string_set_value(field, data + offset + i * len); else { unsigned long long value_int; -- cgit From 3b220cf867ce6881c7f671b27564c4cc13d8c4b8 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 May 2016 09:12:55 +0300 Subject: tools/virtio/ringtest: add usage example to README Having typical usage example in the README file is more convinient than in the git history... Signed-off-by: Mike Rapoport Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/README | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README index 34e94c46104f..d83707a336c9 100644 --- a/tools/virtio/ringtest/README +++ b/tools/virtio/ringtest/README @@ -1,2 +1,6 @@ Partial implementation of various ring layouts, useful to tune virtio design. Uses shared memory heavily. + +Typical use: + +# sh run-on-all.sh perf stat -r 10 --log-fd 1 -- ./ring -- cgit From ef1b144d23dbd745fd655b7d0a70212fcc8d0121 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Wed, 4 May 2016 13:15:50 +0300 Subject: tools/virtio/ringtest: fix run-on-all.sh to work without /dev/cpu /dev/cpu is only available on x86 with certain modules (e.g. msr) enabled. Using lscpu to get processors count is more portable. Signed-off-by: Mike Rapoport Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/run-on-all.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh index 52b0f71ffa8d..2e69ca812b4c 100755 --- a/tools/virtio/ringtest/run-on-all.sh +++ b/tools/virtio/ringtest/run-on-all.sh @@ -3,10 +3,10 @@ #use last CPU for host. Why not the first? #many devices tend to use cpu0 by default so #it tends to be busier -HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1) +HOST_AFFINITY=$(lscpu -p=cpu | tail -1) #run command on all cpus -for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n); +for cpu in $(seq 0 $HOST_AFFINITY) do #Don't run guest and host on same CPU #It actually works ok if using signalling -- cgit From 139ab4d4e68b8cf2a611b06c006a2195dc6bedf1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 24 May 2016 15:21:05 +0300 Subject: tools/virtio: add noring tool Useful to measure testing framework overhead. Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/Makefile | 4 ++- tools/virtio/ringtest/noring.c | 69 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 tools/virtio/ringtest/noring.c (limited to 'tools') diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile index 6ba745529833..6173adae9f08 100644 --- a/tools/virtio/ringtest/Makefile +++ b/tools/virtio/ringtest/Makefile @@ -1,6 +1,6 @@ all: -all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder +all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder noring CFLAGS += -Wall CFLAGS += -pthread -O2 -ggdb @@ -15,11 +15,13 @@ ring: ring.o main.o virtio_ring_0_9: virtio_ring_0_9.o main.o virtio_ring_poll: virtio_ring_poll.o main.o virtio_ring_inorder: virtio_ring_inorder.o main.o +noring: noring.o main.o clean: -rm main.o -rm ring.o ring -rm virtio_ring_0_9.o virtio_ring_0_9 -rm virtio_ring_poll.o virtio_ring_poll -rm virtio_ring_inorder.o virtio_ring_inorder + -rm noring.o noring .PHONY: all clean diff --git a/tools/virtio/ringtest/noring.c b/tools/virtio/ringtest/noring.c new file mode 100644 index 000000000000..eda2f4824130 --- /dev/null +++ b/tools/virtio/ringtest/noring.c @@ -0,0 +1,69 @@ +#define _GNU_SOURCE +#include "main.h" +#include + +/* stub implementation: useful for measuring overhead */ +void alloc_ring(void) +{ +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + return 0; +} + +/* + * skb_array API provides no way for producer to find out whether a given + * buffer was consumed. Our tests merely require that a successful get_buf + * implies that add_inbuf succeed in the past, and that add_inbuf will succeed, + * fake it accordingly. + */ +void *get_buf(unsigned *lenp, void **bufp) +{ + return "Buffer"; +} + +void poll_used(void) +{ +} + +void disable_call() +{ + assert(0); +} + +bool enable_call() +{ + assert(0); +} + +void kick_available(void) +{ + assert(0); +} + +/* host side */ +void disable_kick() +{ + assert(0); +} + +bool enable_kick() +{ + assert(0); +} + +void poll_avail(void) +{ +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + return true; +} + +void call_used(void) +{ + assert(0); +} -- cgit From fc100a7f89da85da8edd9c2e6f6e8b2490d74ae1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 3 Jun 2016 19:19:20 +0200 Subject: soreuseport: Fix reuseport_bpf testcase on 32bit architectures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the following compiler warnings when compiling the reuseport_bpf testcase on a 32 bit platform: reuseport_bpf.c: In function ‘attach_ebpf’: reuseport_bpf.c:114:15: warning: cast from pointer to integer of ifferent size [-Wpointer-to-int-cast] Signed-off-by: Helge Deller Signed-off-by: David S. Miller --- tools/testing/selftests/net/reuseport_bpf.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index 96ba386b1b7b..4a8217448f20 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -111,9 +111,9 @@ static void attach_ebpf(int fd, uint16_t mod) memset(&attr, 0, sizeof(attr)); attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; attr.insn_cnt = ARRAY_SIZE(prog); - attr.insns = (uint64_t)prog; - attr.license = (uint64_t)bpf_license; - attr.log_buf = (uint64_t)bpf_log_buf; + attr.insns = (unsigned long) &prog; + attr.license = (unsigned long) &bpf_license; + attr.log_buf = (unsigned long) &bpf_log_buf; attr.log_size = sizeof(bpf_log_buf); attr.log_level = 1; attr.kern_version = 0; @@ -351,8 +351,8 @@ static void test_filter_no_reuseport(const struct test_params p) memset(&eprog, 0, sizeof(eprog)); eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; eprog.insn_cnt = ARRAY_SIZE(ecode); - eprog.insns = (uint64_t)ecode; - eprog.license = (uint64_t)bpf_license; + eprog.insns = (unsigned long) &ecode; + eprog.license = (unsigned long) &bpf_license; eprog.kern_version = 0; memset(&cprog, 0, sizeof(cprog)); -- cgit From 0ded5174e976e2b2a354fe38abf1ebf4492c6dc3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 23 May 2016 15:06:30 -0400 Subject: ftracetest: Fix hist unsupported result in hist selftests When histograms are not configured in the kernel, the ftracetest histogram selftests should return "unsupported" and not "Failed". To detect this, the test scripts have: FEATURE=`grep hist events/sched/sched_process_fork/trigger` if [ -z "$FEATURE" ]; then echo "hist trigger is not supported" exit_unsupported fi The problem is that '-e' is in effect and any error will cause the program to terminate. The grep for 'hist' fails, because it is not compiled it (thus unsupported), but because grep has an error code for failing to find the string, it causes the program to terminate, and is marked as a failed test. Namhyung Kim recommended to test for the "hist" file located in events/sched/sched_process_fork/hist instead, as it is more inline with the other checks. As the hist file is only created if the histogram feature is enabled, that is a valid check. Link: http://lkml.kernel.org/r/20160523151538.4ea9ce0c@gandalf.local.home Suggested-by: Namhyung Kim Acked-by: Namhyung Kim Acked-by: Masami Hiramatsu Fixes: 76929ab51f0ee ("kselftests/ftrace: Add hist trigger testcases") Signed-off-by: Steven Rostedt --- .../testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc | 9 ++++----- tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc | 9 ++++----- .../testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc | 9 ++++----- 3 files changed, 12 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc index c2b61c4fda11..0bf5085281f3 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc @@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi -reset_tracer -do_reset - -FEATURE=`grep hist events/sched/sched_process_fork/trigger` -if [ -z "$FEATURE" ]; then +if [ ! -f events/sched/sched_process_fork/hist ]; then echo "hist trigger is not supported" exit_unsupported fi +reset_tracer +do_reset + echo "Test histogram with execname modifier" echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc index b2902d42a537..a00184cd9c95 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc @@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi -reset_tracer -do_reset - -FEATURE=`grep hist events/sched/sched_process_fork/trigger` -if [ -z "$FEATURE" ]; then +if [ ! -f events/sched/sched_process_fork/hist ]; then echo "hist trigger is not supported" exit_unsupported fi +reset_tracer +do_reset + echo "Test histogram basic tigger" echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc index 03c4a46561fc..3478b00ead57 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc @@ -23,15 +23,14 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi -reset_tracer -do_reset - -FEATURE=`grep hist events/sched/sched_process_fork/trigger` -if [ -z "$FEATURE" ]; then +if [ ! -f events/sched/sched_process_fork/hist ]; then echo "hist trigger is not supported" exit_unsupported fi +reset_tracer +do_reset + reset_trigger echo "Test histogram multiple tiggers" -- cgit From a7b50abc90afb2e3c27e1bd212643cc53eaf0b60 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Fri, 24 Jun 2016 14:48:40 -0700 Subject: selftests/vm/compaction_test: fix write to restore nr_hugepages The write at the end of the test to restore nr_hugepages to its previous value is failing. This is because it is trying to write the number of bytes in the char array as opposed to the number of bytes in the string. Link: http://lkml.kernel.org/r/1465331205-3284-1-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Cc: Shuah Khan Cc: Sri Jayaramappa Cc: Eric B Munson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/compaction_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index 932ff577ffc0..00c4f65d12da 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -136,7 +136,7 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) printf("No of huge pages allocated = %d\n", (atoi(nr_hugepages))); - if (write(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) + if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) != strlen(initial_nr_hugepages)) { perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); goto close_fd; -- cgit From 7c5b7239465932400ee0825bcc90624717c1af19 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 24 Jun 2016 14:50:21 -0700 Subject: tools/vm/slabinfo: fix spelling mistake: "Ocurrences" -> "Occurrences" trivial fix to spelling mistake Link: http://lkml.kernel.org/r/1466672144-831-1-git-send-email-colin.king@canonical.com Signed-off-by: Colin Ian King Acked-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/vm/slabinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c index 1889163f2f05..7cf6e1769903 100644 --- a/tools/vm/slabinfo.c +++ b/tools/vm/slabinfo.c @@ -492,7 +492,7 @@ static void slab_stats(struct slabinfo *s) s->deactivate_to_head + s->deactivate_to_tail + s->deactivate_bypass; if (total) { - printf("\nSlab Deactivation Ocurrences %%\n"); + printf("\nSlab Deactivation Occurrences %%\n"); printf("-------------------------------------------------\n"); printf("Slab full %7lu %3lu%%\n", s->deactivate_full, (s->deactivate_full * 100) / total); -- cgit