diff options
292 files changed, 8560 insertions, 3675 deletions
diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX index d111e3b23db0..d18ecd827c40 100644 --- a/Documentation/block/00-INDEX +++ b/Documentation/block/00-INDEX @@ -3,15 +3,21 @@ biodoc.txt - Notes on the Generic Block Layer Rewrite in Linux 2.5 capability.txt - - Generic Block Device Capability (/sys/block/<disk>/capability) + - Generic Block Device Capability (/sys/block/<device>/capability) +cfq-iosched.txt + - CFQ IO scheduler tunables +data-integrity.txt + - Block data integrity deadline-iosched.txt - Deadline IO scheduler tunables ioprio.txt - Block io priorities (in CFQ scheduler) +queue-sysfs.txt + - Queue's sysfs entries request.txt - The members of struct request (in include/linux/blkdev.h) stat.txt - - Block layer statistics in /sys/block/<dev>/stat + - Block layer statistics in /sys/block/<device>/stat switching-sched.txt - Switching I/O schedulers at runtime writeback_cache_control.txt diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt index 6d670f570451..d89b4fe724d7 100644 --- a/Documentation/block/cfq-iosched.txt +++ b/Documentation/block/cfq-iosched.txt @@ -1,3 +1,14 @@ +CFQ (Complete Fairness Queueing) +=============================== + +The main aim of CFQ scheduler is to provide a fair allocation of the disk +I/O bandwidth for all the processes which requests an I/O operation. + +CFQ maintains the per process queue for the processes which request I/O +operation(syncronous requests). In case of asynchronous requests, all the +requests from all the processes are batched together according to their +process's I/O priority. + CFQ ioscheduler tunables ======================== @@ -25,6 +36,72 @@ there are multiple spindles behind single LUN (Host based hardware RAID controller or for storage arrays), setting slice_idle=0 might end up in better throughput and acceptable latencies. +back_seek_max +------------- +This specifies, given in Kbytes, the maximum "distance" for backward seeking. +The distance is the amount of space from the current head location to the +sectors that are backward in terms of distance. + +This parameter allows the scheduler to anticipate requests in the "backward" +direction and consider them as being the "next" if they are within this +distance from the current head location. + +back_seek_penalty +----------------- +This parameter is used to compute the cost of backward seeking. If the +backward distance of request is just 1/back_seek_penalty from a "front" +request, then the seeking cost of two requests is considered equivalent. + +So scheduler will not bias toward one or the other request (otherwise scheduler +will bias toward front request). Default value of back_seek_penalty is 2. + +fifo_expire_async +----------------- +This parameter is used to set the timeout of asynchronous requests. Default +value of this is 248ms. + +fifo_expire_sync +---------------- +This parameter is used to set the timeout of synchronous requests. Default +value of this is 124ms. In case to favor synchronous requests over asynchronous +one, this value should be decreased relative to fifo_expire_async. + +slice_async +----------- +This parameter is same as of slice_sync but for asynchronous queue. The +default value is 40ms. + +slice_async_rq +-------------- +This parameter is used to limit the dispatching of asynchronous request to +device request queue in queue's slice time. The maximum number of request that +are allowed to be dispatched also depends upon the io priority. Default value +for this is 2. + +slice_sync +---------- +When a queue is selected for execution, the queues IO requests are only +executed for a certain amount of time(time_slice) before switching to another +queue. This parameter is used to calculate the time slice of synchronous +queue. + +time_slice is computed using the below equation:- +time_slice = slice_sync + (slice_sync/5 * (4 - prio)). To increase the +time_slice of synchronous queue, increase the value of slice_sync. Default +value is 100ms. + +quantum +------- +This specifies the number of request dispatched to the device queue. In a +queue's time slice, a request will not be dispatched if the number of request +in the device exceeds this parameter. This parameter is used for synchronous +request. + +In case of storage with several disk, this setting can limit the parallel +processing of request. Therefore, increasing the value can imporve the +performace although this can cause the latency of some I/O to increase due +to more number of requests. + CFQ IOPS Mode for group scheduling =================================== Basic CFQ design is to provide priority based time slices. Higher priority diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index 6518a55273e7..e54ac1d53403 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt @@ -9,20 +9,71 @@ These files are the ones found in the /sys/block/xxx/queue/ directory. Files denoted with a RO postfix are readonly and the RW postfix means read-write. +add_random (RW) +---------------- +This file allows to trun off the disk entropy contribution. Default +value of this file is '1'(on). + +discard_granularity (RO) +----------------------- +This shows the size of internal allocation of the device in bytes, if +reported by the device. A value of '0' means device does not support +the discard functionality. + +discard_max_bytes (RO) +---------------------- +Devices that support discard functionality may have internal limits on +the number of bytes that can be trimmed or unmapped in a single operation. +The discard_max_bytes parameter is set by the device driver to the maximum +number of bytes that can be discarded in a single operation. Discard +requests issued to the device must not exceed this limit. A discard_max_bytes +value of 0 means that the device does not support discard functionality. + +discard_zeroes_data (RO) +------------------------ +When read, this file will show if the discarded block are zeroed by the +device or not. If its value is '1' the blocks are zeroed otherwise not. + hw_sector_size (RO) ------------------- This is the hardware sector size of the device, in bytes. +iostats (RW) +------------- +This file is used to control (on/off) the iostats accounting of the +disk. + +logical_block_size (RO) +----------------------- +This is the logcal block size of the device, in bytes. + max_hw_sectors_kb (RO) ---------------------- This is the maximum number of kilobytes supported in a single data transfer. +max_integrity_segments (RO) +--------------------------- +When read, this file shows the max limit of integrity segments as +set by block layer which a hardware controller can handle. + max_sectors_kb (RW) ------------------- This is the maximum number of kilobytes that the block layer will allow for a filesystem request. Must be smaller than or equal to the maximum size allowed by the hardware. +max_segments (RO) +----------------- +Maximum number of segments of the device. + +max_segment_size (RO) +--------------------- +Maximum segment size of the device. + +minimum_io_size (RO) +-------------------- +This is the smallest preferred io size reported by the device. + nomerges (RW) ------------- This enables the user to disable the lookup logic involved with IO @@ -45,11 +96,24 @@ per-block-cgroup request pool. IOW, if there are N block cgroups, each request queue may have upto N request pools, each independently regulated by nr_requests. +optimal_io_size (RO) +-------------------- +This is the optimal io size reported by the device. + +physical_block_size (RO) +------------------------ +This is the physical block size of device, in bytes. + read_ahead_kb (RW) ------------------ Maximum number of kilobytes to read-ahead for filesystems on this block device. +rotational (RW) +--------------- +This file is used to stat if the device is of rotational type or +non-rotational type. + rq_affinity (RW) ---------------- If this option is '1', the block layer will migrate request completions to the diff --git a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt index 70cd49b1caa8..1dd622546d06 100644 --- a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt +++ b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt @@ -10,8 +10,8 @@ Required properties: - compatible : Should be "fsl,<chip>-esdhc" Optional properties: -- fsl,cd-internal : Indicate to use controller internal card detection -- fsl,wp-internal : Indicate to use controller internal write protection +- fsl,cd-controller : Indicate to use controller internal card detection +- fsl,wp-controller : Indicate to use controller internal write protection Examples: @@ -19,8 +19,8 @@ esdhc@70004000 { compatible = "fsl,imx51-esdhc"; reg = <0x70004000 0x4000>; interrupts = <1>; - fsl,cd-internal; - fsl,wp-internal; + fsl,cd-controller; + fsl,wp-controller; }; esdhc@70008000 { @@ -609,7 +609,11 @@ KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) endif ifdef CONFIG_FUNCTION_TRACER -KBUILD_CFLAGS += -pg +ifdef CONFIG_HAVE_FENTRY +CC_USING_FENTRY := $(call cc-option, -mfentry -DCC_USING_FENTRY) +endif +KBUILD_CFLAGS += -pg $(CC_USING_FENTRY) +KBUILD_AFLAGS += $(CC_USING_FENTRY) ifdef CONFIG_DYNAMIC_FTRACE ifdef CONFIG_HAVE_C_RECORDMCOUNT BUILD_C_RECORDMCOUNT := y diff --git a/arch/Kconfig b/arch/Kconfig index 72f2fa189cc5..2a83a3f6a615 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -222,6 +222,19 @@ config HAVE_PERF_EVENTS_NMI subsystem. Also has support for calculating CPU cycle events to determine how many clock cycles in a given period. +config HAVE_PERF_REGS + bool + help + Support selective register dumps for perf events. This includes + bit-mapping of each registers and a unique architecture id. + +config HAVE_PERF_USER_STACK_DUMP + bool + help + Support user stack dumps for perf event samples. This needs + access to the user stack pointer which is not unified across + architectures. + config HAVE_ARCH_JUMP_LABEL bool diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 6d6e18fee9fe..c5f9ae5dbd1a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2144,6 +2144,7 @@ source "drivers/cpufreq/Kconfig" config CPU_FREQ_IMX tristate "CPUfreq driver for i.MX CPUs" depends on ARCH_MXC && CPU_FREQ + select CPU_FREQ_TABLE help This enables the CPUfreq driver for i.MX CPUs. diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 59509c48d7e5..bd0cff3f808c 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -154,5 +154,10 @@ #size-cells = <0>; ti,hwmods = "i2c3"; }; + + wdt2: wdt@44e35000 { + compatible = "ti,omap3-wdt"; + ti,hwmods = "wd_timer2"; + }; }; }; diff --git a/arch/arm/boot/dts/imx51-babbage.dts b/arch/arm/boot/dts/imx51-babbage.dts index cd86177a3ea2..59d9789e5508 100644 --- a/arch/arm/boot/dts/imx51-babbage.dts +++ b/arch/arm/boot/dts/imx51-babbage.dts @@ -25,8 +25,8 @@ aips@70000000 { /* aips-1 */ spba@70000000 { esdhc@70004000 { /* ESDHC1 */ - fsl,cd-internal; - fsl,wp-internal; + fsl,cd-controller; + fsl,wp-controller; status = "okay"; }; diff --git a/arch/arm/boot/dts/kirkwood-iconnect.dts b/arch/arm/boot/dts/kirkwood-iconnect.dts index 52d947045106..f8ca6fa88192 100644 --- a/arch/arm/boot/dts/kirkwood-iconnect.dts +++ b/arch/arm/boot/dts/kirkwood-iconnect.dts @@ -41,9 +41,13 @@ }; power-blue { label = "power:blue"; - gpios = <&gpio1 11 0>; + gpios = <&gpio1 10 0>; linux,default-trigger = "timer"; }; + power-red { + label = "power:red"; + gpios = <&gpio1 11 0>; + }; usb1 { label = "usb1:blue"; gpios = <&gpio1 12 0>; diff --git a/arch/arm/boot/dts/twl6030.dtsi b/arch/arm/boot/dts/twl6030.dtsi index 3b2f3510d7eb..d351b27d7213 100644 --- a/arch/arm/boot/dts/twl6030.dtsi +++ b/arch/arm/boot/dts/twl6030.dtsi @@ -66,6 +66,7 @@ vcxio: regulator@8 { compatible = "ti,twl6030-vcxio"; + regulator-always-on; }; vusb: regulator@9 { @@ -74,10 +75,12 @@ v1v8: regulator@10 { compatible = "ti,twl6030-v1v8"; + regulator-always-on; }; v2v1: regulator@11 { compatible = "ti,twl6030-v2v1"; + regulator-always-on; }; clk32kg: regulator@12 { diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig index 2d4f661d1cf6..da6845493caa 100644 --- a/arch/arm/configs/u8500_defconfig +++ b/arch/arm/configs/u8500_defconfig @@ -86,6 +86,7 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_LM3530=y CONFIG_LEDS_LP5521=y +CONFIG_LEDS_GPIO=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_AB8500=y CONFIG_RTC_DRV_PL031=y diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c index 4db5de54b6a7..6321567d8eaa 100644 --- a/arch/arm/mach-dove/common.c +++ b/arch/arm/mach-dove/common.c @@ -102,7 +102,8 @@ void __init dove_ehci1_init(void) void __init dove_ge00_init(struct mv643xx_eth_platform_data *eth_data) { orion_ge00_init(eth_data, DOVE_GE00_PHYS_BASE, - IRQ_DOVE_GE00_SUM, IRQ_DOVE_GE00_ERR); + IRQ_DOVE_GE00_SUM, IRQ_DOVE_GE00_ERR, + 1600); } /***************************************************************************** diff --git a/arch/arm/mach-exynos/mach-origen.c b/arch/arm/mach-exynos/mach-origen.c index 5ca80307d6d7..4e574c24581c 100644 --- a/arch/arm/mach-exynos/mach-origen.c +++ b/arch/arm/mach-exynos/mach-origen.c @@ -42,6 +42,7 @@ #include <plat/backlight.h> #include <plat/fb.h> #include <plat/mfc.h> +#include <plat/hdmi.h> #include <mach/ohci.h> #include <mach/map.h> @@ -734,6 +735,11 @@ static void __init origen_bt_setup(void) s3c_gpio_setpull(EXYNOS4_GPX2(2), S3C_GPIO_PULL_NONE); } +/* I2C module and id for HDMIPHY */ +static struct i2c_board_info hdmiphy_info = { + I2C_BOARD_INFO("hdmiphy-exynos4210", 0x38), +}; + static void s5p_tv_setup(void) { /* Direct HPD to HDMI chip */ @@ -781,6 +787,7 @@ static void __init origen_machine_init(void) s5p_tv_setup(); s5p_i2c_hdmiphy_set_platdata(NULL); + s5p_hdmi_set_platdata(&hdmiphy_info, NULL, 0); #ifdef CONFIG_DRM_EXYNOS s5p_device_fimd0.dev.platform_data = &drm_fimd_pdata; diff --git a/arch/arm/mach-exynos/mach-smdkv310.c b/arch/arm/mach-exynos/mach-smdkv310.c index 3cfa688d274a..73f2bce097e1 100644 --- a/arch/arm/mach-exynos/mach-smdkv310.c +++ b/arch/arm/mach-exynos/mach-smdkv310.c @@ -40,6 +40,7 @@ #include <plat/mfc.h> #include <plat/ehci.h> #include <plat/clock.h> +#include <plat/hdmi.h> #include <mach/map.h> #include <mach/ohci.h> @@ -354,6 +355,11 @@ static struct platform_pwm_backlight_data smdkv310_bl_data = { .pwm_period_ns = 1000, }; +/* I2C module and id for HDMIPHY */ +static struct i2c_board_info hdmiphy_info = { + I2C_BOARD_INFO("hdmiphy-exynos4210", 0x38), +}; + static void s5p_tv_setup(void) { /* direct HPD to HDMI chip */ @@ -388,6 +394,7 @@ static void __init smdkv310_machine_init(void) s5p_tv_setup(); s5p_i2c_hdmiphy_set_platdata(NULL); + s5p_hdmi_set_platdata(&hdmiphy_info, NULL, 0); samsung_keypad_set_platdata(&smdkv310_keypad_data); diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 07f7c226e4cf..d004d37ad9d8 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -9,7 +9,8 @@ obj-$(CONFIG_SOC_IMX27) += clk-imx27.o mm-imx27.o ehci-imx27.o obj-$(CONFIG_SOC_IMX31) += mm-imx3.o cpu-imx31.o clk-imx31.o iomux-imx31.o ehci-imx31.o pm-imx3.o obj-$(CONFIG_SOC_IMX35) += mm-imx3.o cpu-imx35.o clk-imx35.o ehci-imx35.o pm-imx3.o -obj-$(CONFIG_SOC_IMX5) += cpu-imx5.o mm-imx5.o clk-imx51-imx53.o ehci-imx5.o pm-imx5.o cpu_op-mx51.o +imx5-pm-$(CONFIG_PM) += pm-imx5.o +obj-$(CONFIG_SOC_IMX5) += cpu-imx5.o mm-imx5.o clk-imx51-imx53.o ehci-imx5.o $(imx5-pm-y) cpu_op-mx51.o obj-$(CONFIG_COMMON_CLK) += clk-pllv1.o clk-pllv2.o clk-pllv3.o clk-gate2.o \ clk-pfd.o clk-busy.o @@ -70,14 +71,13 @@ obj-$(CONFIG_DEBUG_LL) += lluart.o obj-$(CONFIG_HAVE_IMX_GPC) += gpc.o obj-$(CONFIG_HAVE_IMX_MMDC) += mmdc.o obj-$(CONFIG_HAVE_IMX_SRC) += src.o -obj-$(CONFIG_CPU_V7) += head-v7.o -AFLAGS_head-v7.o :=-Wa,-march=armv7-a -obj-$(CONFIG_SMP) += platsmp.o +AFLAGS_headsmp.o :=-Wa,-march=armv7-a +obj-$(CONFIG_SMP) += headsmp.o platsmp.o obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o obj-$(CONFIG_SOC_IMX6Q) += clk-imx6q.o mach-imx6q.o ifeq ($(CONFIG_PM),y) -obj-$(CONFIG_SOC_IMX6Q) += pm-imx6q.o +obj-$(CONFIG_SOC_IMX6Q) += pm-imx6q.o headsmp.o endif # i.MX5 based machines diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index ea89520b6e22..4233d9e3531d 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -152,7 +152,7 @@ enum mx6q_clks { ssi2, ssi3, uart_ipg, uart_serial, usboh3, usdhc1, usdhc2, usdhc3, usdhc4, vdo_axi, vpu_axi, cko1, pll1_sys, pll2_bus, pll3_usb_otg, pll4_audio, pll5_video, pll6_mlb, pll7_usb_host, pll8_enet, ssi1_ipg, - ssi2_ipg, ssi3_ipg, rom, usbphy1, usbphy2, + ssi2_ipg, ssi3_ipg, rom, usbphy1, usbphy2, ldb_di0_div_3_5, ldb_di1_div_3_5, clk_max }; @@ -288,8 +288,10 @@ int __init mx6q_clocks_init(void) clk[gpu3d_shader] = imx_clk_divider("gpu3d_shader", "gpu3d_shader_sel", base + 0x18, 29, 3); clk[ipu1_podf] = imx_clk_divider("ipu1_podf", "ipu1_sel", base + 0x3c, 11, 3); clk[ipu2_podf] = imx_clk_divider("ipu2_podf", "ipu2_sel", base + 0x3c, 16, 3); - clk[ldb_di0_podf] = imx_clk_divider("ldb_di0_podf", "ldb_di0_sel", base + 0x20, 10, 1); - clk[ldb_di1_podf] = imx_clk_divider("ldb_di1_podf", "ldb_di1_sel", base + 0x20, 11, 1); + clk[ldb_di0_div_3_5] = imx_clk_fixed_factor("ldb_di0_div_3_5", "ldb_di0_sel", 2, 7); + clk[ldb_di0_podf] = imx_clk_divider("ldb_di0_podf", "ldb_di0_div_3_5", base + 0x20, 10, 1); + clk[ldb_di1_div_3_5] = imx_clk_fixed_factor("ldb_di1_div_3_5", "ldb_di1_sel", 2, 7); + clk[ldb_di1_podf] = imx_clk_divider("ldb_di1_podf", "ldb_di1_div_3_5", base + 0x20, 11, 1); clk[ipu1_di0_pre] = imx_clk_divider("ipu1_di0_pre", "ipu1_di0_pre_sel", base + 0x34, 3, 3); clk[ipu1_di1_pre] = imx_clk_divider("ipu1_di1_pre", "ipu1_di1_pre_sel", base + 0x34, 12, 3); clk[ipu2_di0_pre] = imx_clk_divider("ipu2_di0_pre", "ipu2_di0_pre_sel", base + 0x38, 3, 3); diff --git a/arch/arm/mach-imx/head-v7.S b/arch/arm/mach-imx/headsmp.S index 7e49deb128a4..7e49deb128a4 100644 --- a/arch/arm/mach-imx/head-v7.S +++ b/arch/arm/mach-imx/headsmp.S diff --git a/arch/arm/mach-imx/hotplug.c b/arch/arm/mach-imx/hotplug.c index 20ed2d56c1af..f8f7437c83b8 100644 --- a/arch/arm/mach-imx/hotplug.c +++ b/arch/arm/mach-imx/hotplug.c @@ -42,22 +42,6 @@ static inline void cpu_enter_lowpower(void) : "cc"); } -static inline void cpu_leave_lowpower(void) -{ - unsigned int v; - - asm volatile( - "mrc p15, 0, %0, c1, c0, 0\n" - " orr %0, %0, %1\n" - " mcr p15, 0, %0, c1, c0, 0\n" - " mrc p15, 0, %0, c1, c0, 1\n" - " orr %0, %0, %2\n" - " mcr p15, 0, %0, c1, c0, 1\n" - : "=&r" (v) - : "Ir" (CR_C), "Ir" (0x40) - : "cc"); -} - /* * platform-specific code to shutdown a CPU * @@ -67,11 +51,10 @@ void platform_cpu_die(unsigned int cpu) { cpu_enter_lowpower(); imx_enable_cpu(cpu, false); - cpu_do_idle(); - cpu_leave_lowpower(); - /* We should never return from idle */ - panic("cpu %d unexpectedly exit from shutdown\n", cpu); + /* spin here until hardware takes it down */ + while (1) + ; } int platform_cpu_disable(unsigned int cpu) diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 5ec0608f2a76..045b3f6a387d 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -71,7 +71,7 @@ soft: /* For imx6q sabrelite board: set KSZ9021RN RGMII pad skew */ static int ksz9021rn_phy_fixup(struct phy_device *phydev) { - if (IS_ENABLED(CONFIG_PHYLIB)) { + if (IS_BUILTIN(CONFIG_PHYLIB)) { /* min rx data delay */ phy_write(phydev, 0x0b, 0x8105); phy_write(phydev, 0x0c, 0x0000); @@ -112,7 +112,7 @@ put_clk: static void __init imx6q_sabrelite_init(void) { - if (IS_ENABLED(CONFIG_PHYLIB)) + if (IS_BUILTIN(CONFIG_PHYLIB)) phy_register_fixup_for_uid(PHY_ID_KSZ9021, MICREL_PHY_ID_MASK, ksz9021rn_phy_fixup); imx6q_sabrelite_cko1_setup(); diff --git a/arch/arm/mach-kirkwood/Makefile.boot b/arch/arm/mach-kirkwood/Makefile.boot index a5717558ee89..a13299d758e1 100644 --- a/arch/arm/mach-kirkwood/Makefile.boot +++ b/arch/arm/mach-kirkwood/Makefile.boot @@ -7,7 +7,8 @@ dtb-$(CONFIG_MACH_DLINK_KIRKWOOD_DT) += kirkwood-dns320.dtb dtb-$(CONFIG_MACH_DLINK_KIRKWOOD_DT) += kirkwood-dns325.dtb dtb-$(CONFIG_MACH_ICONNECT_DT) += kirkwood-iconnect.dtb dtb-$(CONFIG_MACH_IB62X0_DT) += kirkwood-ib62x0.dtb -dtb-$(CONFIG_MACH_TS219_DT) += kirkwood-qnap-ts219.dtb +dtb-$(CONFIG_MACH_TS219_DT) += kirkwood-ts219-6281.dtb +dtb-$(CONFIG_MACH_TS219_DT) += kirkwood-ts219-6282.dtb dtb-$(CONFIG_MACH_GOFLEXNET_DT) += kirkwood-goflexnet.dtb dtb-$(CONFIG_MACH_LSXL_DT) += kirkwood-lschlv2.dtb dtb-$(CONFIG_MACH_LSXL_DT) += kirkwood-lsxhl.dtb diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index c4b64adcbfce..3226077735b1 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -301,7 +301,7 @@ void __init kirkwood_ge00_init(struct mv643xx_eth_platform_data *eth_data) { orion_ge00_init(eth_data, GE00_PHYS_BASE, IRQ_KIRKWOOD_GE00_SUM, - IRQ_KIRKWOOD_GE00_ERR); + IRQ_KIRKWOOD_GE00_ERR, 1600); /* The interface forgets the MAC address assigned by u-boot if the clock is turned off, so claim the clk now. */ clk_prepare_enable(ge0); @@ -315,7 +315,7 @@ void __init kirkwood_ge01_init(struct mv643xx_eth_platform_data *eth_data) { orion_ge01_init(eth_data, GE01_PHYS_BASE, IRQ_KIRKWOOD_GE01_SUM, - IRQ_KIRKWOOD_GE01_ERR); + IRQ_KIRKWOOD_GE01_ERR, 1600); clk_prepare_enable(ge1); } diff --git a/arch/arm/mach-mmp/sram.c b/arch/arm/mach-mmp/sram.c index 4304f9519372..7e8a5a2e1ec7 100644 --- a/arch/arm/mach-mmp/sram.c +++ b/arch/arm/mach-mmp/sram.c @@ -68,7 +68,7 @@ static int __devinit sram_probe(struct platform_device *pdev) struct resource *res; int ret = 0; - if (!pdata && !pdata->pool_name) + if (!pdata || !pdata->pool_name) return -ENODEV; info = kzalloc(sizeof(*info), GFP_KERNEL); diff --git a/arch/arm/mach-mv78xx0/addr-map.c b/arch/arm/mach-mv78xx0/addr-map.c index 62b53d710efd..a9bc84180d21 100644 --- a/arch/arm/mach-mv78xx0/addr-map.c +++ b/arch/arm/mach-mv78xx0/addr-map.c @@ -37,7 +37,7 @@ #define WIN0_OFF(n) (BRIDGE_VIRT_BASE + 0x0000 + ((n) << 4)) #define WIN8_OFF(n) (BRIDGE_VIRT_BASE + 0x0900 + (((n) - 8) << 4)) -static void __init __iomem *win_cfg_base(int win) +static void __init __iomem *win_cfg_base(const struct orion_addr_map_cfg *cfg, int win) { /* * Find the control register base address for this window. diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c index b4c53b846c9c..3057f7d4329a 100644 --- a/arch/arm/mach-mv78xx0/common.c +++ b/arch/arm/mach-mv78xx0/common.c @@ -213,7 +213,8 @@ void __init mv78xx0_ge00_init(struct mv643xx_eth_platform_data *eth_data) { orion_ge00_init(eth_data, GE00_PHYS_BASE, IRQ_MV78XX0_GE00_SUM, - IRQ_MV78XX0_GE_ERR); + IRQ_MV78XX0_GE_ERR, + MV643XX_TX_CSUM_DEFAULT_LIMIT); } @@ -224,7 +225,8 @@ void __init mv78xx0_ge01_init(struct mv643xx_eth_platform_data *eth_data) { orion_ge01_init(eth_data, GE01_PHYS_BASE, IRQ_MV78XX0_GE01_SUM, - NO_IRQ); + NO_IRQ, + MV643XX_TX_CSUM_DEFAULT_LIMIT); } diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index dd2db025f778..fcd4e85c4ddc 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -62,13 +62,14 @@ config ARCH_OMAP4 select PM_OPP if PM select USB_ARCH_HAS_EHCI if USB_SUPPORT select ARM_CPU_SUSPEND if PM - select ARCH_NEEDS_CPU_IDLE_COUPLED + select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP config SOC_OMAP5 bool "TI OMAP5" select CPU_V7 select ARM_GIC select HAVE_SMP + select ARM_CPU_SUSPEND if PM comment "OMAP Core Type" depends on ARCH_OMAP2 diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c index 74915295482e..28214483aaba 100644 --- a/arch/arm/mach-omap2/board-igep0020.c +++ b/arch/arm/mach-omap2/board-igep0020.c @@ -554,6 +554,8 @@ static const struct usbhs_omap_board_data igep3_usbhs_bdata __initconst = { #ifdef CONFIG_OMAP_MUX static struct omap_board_mux board_mux[] __initdata = { + /* SMSC9221 LAN Controller ETH IRQ (GPIO_176) */ + OMAP3_MUX(MCSPI1_CS2, OMAP_MUX_MODE4 | OMAP_PIN_INPUT), { .reg_offset = OMAP_MUX_TERMINATOR }, }; #endif diff --git a/arch/arm/mach-omap2/board-omap3evm.c b/arch/arm/mach-omap2/board-omap3evm.c index ef230a0eb5eb..0d362e9f9cb9 100644 --- a/arch/arm/mach-omap2/board-omap3evm.c +++ b/arch/arm/mach-omap2/board-omap3evm.c @@ -58,6 +58,7 @@ #include "hsmmc.h" #include "common-board-devices.h" +#define OMAP3_EVM_TS_GPIO 175 #define OMAP3_EVM_EHCI_VBUS 22 #define OMAP3_EVM_EHCI_SELECT 61 diff --git a/arch/arm/mach-omap2/common-board-devices.c b/arch/arm/mach-omap2/common-board-devices.c index 14734746457c..c1875862679f 100644 --- a/arch/arm/mach-omap2/common-board-devices.c +++ b/arch/arm/mach-omap2/common-board-devices.c @@ -35,16 +35,6 @@ static struct omap2_mcspi_device_config ads7846_mcspi_config = { .turbo_mode = 0, }; -/* - * ADS7846 driver maybe request a gpio according to the value - * of pdata->get_pendown_state, but we have done this. So set - * get_pendown_state to avoid twice gpio requesting. - */ -static int omap3_get_pendown_state(void) -{ - return !gpio_get_value(OMAP3_EVM_TS_GPIO); -} - static struct ads7846_platform_data ads7846_config = { .x_max = 0x0fff, .y_max = 0x0fff, @@ -55,7 +45,6 @@ static struct ads7846_platform_data ads7846_config = { .debounce_rep = 1, .gpio_pendown = -EINVAL, .keep_vref_on = 1, - .get_pendown_state = &omap3_get_pendown_state, }; static struct spi_board_info ads7846_spi_board_info __initdata = { diff --git a/arch/arm/mach-omap2/common-board-devices.h b/arch/arm/mach-omap2/common-board-devices.h index 4c4ef6a6166b..a0b4a42836ab 100644 --- a/arch/arm/mach-omap2/common-board-devices.h +++ b/arch/arm/mach-omap2/common-board-devices.h @@ -4,7 +4,6 @@ #include "twl-common.h" #define NAND_BLOCK_SIZE SZ_128K -#define OMAP3_EVM_TS_GPIO 175 struct mtd_partition; struct ads7846_platform_data; diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index ee05e193fc61..288bee6cbb76 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -238,8 +238,9 @@ int __init omap4_idle_init(void) for_each_cpu(cpu_id, cpu_online_mask) { dev = &per_cpu(omap4_idle_dev, cpu_id); dev->cpu = cpu_id; +#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED dev->coupled_cpus = *cpu_online_mask; - +#endif cpuidle_register_driver(&omap4_idle_driver); if (cpuidle_register_device(dev)) { diff --git a/arch/arm/mach-omap2/mux.h b/arch/arm/mach-omap2/mux.h index 471e62a74a16..76f9b3c2f586 100644 --- a/arch/arm/mach-omap2/mux.h +++ b/arch/arm/mach-omap2/mux.h @@ -127,7 +127,6 @@ struct omap_mux_partition { * @gpio: GPIO number * @muxnames: available signal modes for a ball * @balls: available balls on the package - * @partition: mux partition */ struct omap_mux { u16 reg_offset; diff --git a/arch/arm/mach-omap2/opp4xxx_data.c b/arch/arm/mach-omap2/opp4xxx_data.c index 2293ba27101b..c95415da23c2 100644 --- a/arch/arm/mach-omap2/opp4xxx_data.c +++ b/arch/arm/mach-omap2/opp4xxx_data.c @@ -94,7 +94,7 @@ int __init omap4_opp_init(void) { int r = -ENODEV; - if (!cpu_is_omap44xx()) + if (!cpu_is_omap443x()) return r; r = omap_init_opp_table(omap44xx_opp_def_list, diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index e4fc88c65dbd..05bd8f02723f 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -272,21 +272,16 @@ void omap_sram_idle(void) per_next_state = pwrdm_read_next_pwrst(per_pwrdm); core_next_state = pwrdm_read_next_pwrst(core_pwrdm); - if (mpu_next_state < PWRDM_POWER_ON) { - pwrdm_pre_transition(mpu_pwrdm); - pwrdm_pre_transition(neon_pwrdm); - } + pwrdm_pre_transition(NULL); /* PER */ if (per_next_state < PWRDM_POWER_ON) { - pwrdm_pre_transition(per_pwrdm); per_going_off = (per_next_state == PWRDM_POWER_OFF) ? 1 : 0; omap2_gpio_prepare_for_idle(per_going_off); } /* CORE */ if (core_next_state < PWRDM_POWER_ON) { - pwrdm_pre_transition(core_pwrdm); if (core_next_state == PWRDM_POWER_OFF) { omap3_core_save_context(); omap3_cm_save_context(); @@ -339,20 +334,14 @@ void omap_sram_idle(void) omap2_prm_clear_mod_reg_bits(OMAP3430_AUTO_OFF_MASK, OMAP3430_GR_MOD, OMAP3_PRM_VOLTCTRL_OFFSET); - pwrdm_post_transition(core_pwrdm); } omap3_intc_resume_idle(); + pwrdm_post_transition(NULL); + /* PER */ - if (per_next_state < PWRDM_POWER_ON) { + if (per_next_state < PWRDM_POWER_ON) omap2_gpio_resume_after_idle(); - pwrdm_post_transition(per_pwrdm); - } - - if (mpu_next_state < PWRDM_POWER_ON) { - pwrdm_post_transition(mpu_pwrdm); - pwrdm_post_transition(neon_pwrdm); - } } static void omap3_pm_idle(void) diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S index 9f6b83d1b193..91e71d8f46f0 100644 --- a/arch/arm/mach-omap2/sleep44xx.S +++ b/arch/arm/mach-omap2/sleep44xx.S @@ -56,9 +56,13 @@ ppa_por_params: * The restore function pointer is stored at CPUx_WAKEUP_NS_PA_ADDR_OFFSET. * It returns to the caller for CPU INACTIVE and ON power states or in case * CPU failed to transition to targeted OFF/DORMANT state. + * + * omap4_finish_suspend() calls v7_flush_dcache_all() which doesn't save + * stack frame and it expects the caller to take care of it. Hence the entire + * stack frame is saved to avoid possible stack corruption. */ ENTRY(omap4_finish_suspend) - stmfd sp!, {lr} + stmfd sp!, {r4-r12, lr} cmp r0, #0x0 beq do_WFI @ No lowpower state, jump to WFI @@ -226,7 +230,7 @@ scu_gp_clear: skip_scu_gp_clear: isb dsb - ldmfd sp!, {pc} + ldmfd sp!, {r4-r12, pc} ENDPROC(omap4_finish_suspend) /* diff --git a/arch/arm/mach-omap2/twl-common.c b/arch/arm/mach-omap2/twl-common.c index de47f170ba50..db5ff6642375 100644 --- a/arch/arm/mach-omap2/twl-common.c +++ b/arch/arm/mach-omap2/twl-common.c @@ -67,6 +67,7 @@ void __init omap_pmic_init(int bus, u32 clkrate, const char *pmic_type, int pmic_irq, struct twl4030_platform_data *pmic_data) { + omap_mux_init_signal("sys_nirq", OMAP_PIN_INPUT_PULLUP | OMAP_PIN_OFF_WAKEUPENABLE); strncpy(pmic_i2c_board_info.type, pmic_type, sizeof(pmic_i2c_board_info.type)); pmic_i2c_board_info.irq = pmic_irq; diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index 9148b229d0de..410291c67666 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -109,7 +109,8 @@ void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data) { orion_ge00_init(eth_data, ORION5X_ETH_PHYS_BASE, IRQ_ORION5X_ETH_SUM, - IRQ_ORION5X_ETH_ERR); + IRQ_ORION5X_ETH_ERR, + MV643XX_TX_CSUM_DEFAULT_LIMIT); } diff --git a/arch/arm/mach-s3c24xx/include/mach/dma.h b/arch/arm/mach-s3c24xx/include/mach/dma.h index 454831b66037..ee99fd56c043 100644 --- a/arch/arm/mach-s3c24xx/include/mach/dma.h +++ b/arch/arm/mach-s3c24xx/include/mach/dma.h @@ -24,7 +24,8 @@ */ enum dma_ch { - DMACH_XD0, + DMACH_DT_PROP = -1, /* not yet supported, do not use */ + DMACH_XD0 = 0, DMACH_XD1, DMACH_SDI, DMACH_SPI0, diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig index c013bbf79cac..53d3d46dec12 100644 --- a/arch/arm/mach-ux500/Kconfig +++ b/arch/arm/mach-ux500/Kconfig @@ -41,7 +41,6 @@ config MACH_HREFV60 config MACH_SNOWBALL bool "U8500 Snowball platform" select MACH_MOP500 - select LEDS_GPIO help Include support for the snowball development platform. diff --git a/arch/arm/mach-ux500/board-mop500-msp.c b/arch/arm/mach-ux500/board-mop500-msp.c index 996048038743..df15646036aa 100644 --- a/arch/arm/mach-ux500/board-mop500-msp.c +++ b/arch/arm/mach-ux500/board-mop500-msp.c @@ -191,9 +191,9 @@ static struct platform_device *db8500_add_msp_i2s(struct device *parent, return pdev; } -/* Platform device for ASoC U8500 machine */ -static struct platform_device snd_soc_u8500 = { - .name = "snd-soc-u8500", +/* Platform device for ASoC MOP500 machine */ +static struct platform_device snd_soc_mop500 = { + .name = "snd-soc-mop500", .id = 0, .dev = { .platform_data = NULL, @@ -227,8 +227,8 @@ int mop500_msp_init(struct device *parent) { struct platform_device *msp1; - pr_info("%s: Register platform-device 'snd-soc-u8500'.\n", __func__); - platform_device_register(&snd_soc_u8500); + pr_info("%s: Register platform-device 'snd-soc-mop500'.\n", __func__); + platform_device_register(&snd_soc_mop500); pr_info("Initialize MSP I2S-devices.\n"); db8500_add_msp_i2s(parent, 0, U8500_MSP0_BASE, IRQ_DB8500_MSP0, diff --git a/arch/arm/mach-ux500/board-mop500.c b/arch/arm/mach-ux500/board-mop500.c index 8674a890fd1c..a534d8880de1 100644 --- a/arch/arm/mach-ux500/board-mop500.c +++ b/arch/arm/mach-ux500/board-mop500.c @@ -797,6 +797,7 @@ static void __init u8500_init_machine(void) ARRAY_SIZE(mop500_platform_devs)); mop500_sdi_init(parent); + mop500_msp_init(parent); i2c0_devs = ARRAY_SIZE(mop500_i2c0_devices); i2c_register_board_info(0, mop500_i2c0_devices, i2c0_devs); i2c_register_board_info(2, mop500_i2c2_devices, @@ -804,6 +805,8 @@ static void __init u8500_init_machine(void) mop500_uib_init(); + } else if (of_machine_is_compatible("calaosystems,snowball-a9500")) { + mop500_msp_init(parent); } else if (of_machine_is_compatible("st-ericsson,hrefv60+")) { /* * The HREFv60 board removed a GPIO expander and routed @@ -815,6 +818,7 @@ static void __init u8500_init_machine(void) ARRAY_SIZE(mop500_platform_devs)); hrefv60_sdi_init(parent); + mop500_msp_init(parent); i2c0_devs = ARRAY_SIZE(mop500_i2c0_devices); i2c0_devs -= NUM_PRE_V60_I2C0_DEVICES; diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c index 626ad8cad7a9..938b50a33439 100644 --- a/arch/arm/plat-omap/dmtimer.c +++ b/arch/arm/plat-omap/dmtimer.c @@ -189,6 +189,7 @@ struct omap_dm_timer *omap_dm_timer_request(void) timer->reserved = 1; break; } + spin_unlock_irqrestore(&dm_timer_lock, flags); if (timer) { ret = omap_dm_timer_prepare(timer); @@ -197,7 +198,6 @@ struct omap_dm_timer *omap_dm_timer_request(void) timer = NULL; } } - spin_unlock_irqrestore(&dm_timer_lock, flags); if (!timer) pr_debug("%s: timer request failed!\n", __func__); @@ -220,6 +220,7 @@ struct omap_dm_timer *omap_dm_timer_request_specific(int id) break; } } + spin_unlock_irqrestore(&dm_timer_lock, flags); if (timer) { ret = omap_dm_timer_prepare(timer); @@ -228,7 +229,6 @@ struct omap_dm_timer *omap_dm_timer_request_specific(int id) timer = NULL; } } - spin_unlock_irqrestore(&dm_timer_lock, flags); if (!timer) pr_debug("%s: timer%d request failed!\n", __func__, id); @@ -258,7 +258,7 @@ EXPORT_SYMBOL_GPL(omap_dm_timer_enable); void omap_dm_timer_disable(struct omap_dm_timer *timer) { - pm_runtime_put(&timer->pdev->dev); + pm_runtime_put_sync(&timer->pdev->dev); } EXPORT_SYMBOL_GPL(omap_dm_timer_disable); diff --git a/arch/arm/plat-omap/include/plat/cpu.h b/arch/arm/plat-omap/include/plat/cpu.h index 68b180edcfff..bb5d08a70dbc 100644 --- a/arch/arm/plat-omap/include/plat/cpu.h +++ b/arch/arm/plat-omap/include/plat/cpu.h @@ -372,7 +372,8 @@ IS_OMAP_TYPE(3430, 0x3430) #define cpu_class_is_omap1() (cpu_is_omap7xx() || cpu_is_omap15xx() || \ cpu_is_omap16xx()) #define cpu_class_is_omap2() (cpu_is_omap24xx() || cpu_is_omap34xx() || \ - cpu_is_omap44xx() || soc_is_omap54xx()) + cpu_is_omap44xx() || soc_is_omap54xx() || \ + soc_is_am33xx()) /* Various silicon revisions for omap2 */ #define OMAP242X_CLASS 0x24200024 diff --git a/arch/arm/plat-omap/include/plat/multi.h b/arch/arm/plat-omap/include/plat/multi.h index 045e320f1067..324d31b14852 100644 --- a/arch/arm/plat-omap/include/plat/multi.h +++ b/arch/arm/plat-omap/include/plat/multi.h @@ -108,4 +108,13 @@ # endif #endif +#ifdef CONFIG_SOC_AM33XX +# ifdef OMAP_NAME +# undef MULTI_OMAP2 +# define MULTI_OMAP2 +# else +# define OMAP_NAME am33xx +# endif +#endif + #endif /* __PLAT_OMAP_MULTI_H */ diff --git a/arch/arm/plat-omap/include/plat/uncompress.h b/arch/arm/plat-omap/include/plat/uncompress.h index b8d19a136781..7f7b112acccb 100644 --- a/arch/arm/plat-omap/include/plat/uncompress.h +++ b/arch/arm/plat-omap/include/plat/uncompress.h @@ -110,7 +110,7 @@ static inline void flush(void) _DEBUG_LL_ENTRY(mach, AM33XX_UART##p##_BASE, OMAP_PORT_SHIFT, \ AM33XXUART##p) -static inline void __arch_decomp_setup(unsigned long arch_id) +static inline void arch_decomp_setup(void) { int port = 0; @@ -198,8 +198,6 @@ static inline void __arch_decomp_setup(unsigned long arch_id) } while (0); } -#define arch_decomp_setup() __arch_decomp_setup(arch_id) - /* * nothing to do */ diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index d245a87dc014..b8b747a9d360 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -291,10 +291,12 @@ static struct platform_device orion_ge00 = { void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, unsigned long irq, - unsigned long irq_err) + unsigned long irq_err, + unsigned int tx_csum_limit) { fill_resources(&orion_ge00_shared, orion_ge00_shared_resources, mapbase + 0x2000, SZ_16K - 1, irq_err); + orion_ge00_shared_data.tx_csum_limit = tx_csum_limit; ge_complete(&orion_ge00_shared_data, orion_ge00_resources, irq, &orion_ge00_shared, eth_data, &orion_ge00); @@ -343,10 +345,12 @@ static struct platform_device orion_ge01 = { void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, unsigned long irq, - unsigned long irq_err) + unsigned long irq_err, + unsigned int tx_csum_limit) { fill_resources(&orion_ge01_shared, orion_ge01_shared_resources, mapbase + 0x2000, SZ_16K - 1, irq_err); + orion_ge01_shared_data.tx_csum_limit = tx_csum_limit; ge_complete(&orion_ge01_shared_data, orion_ge01_resources, irq, &orion_ge01_shared, eth_data, &orion_ge01); diff --git a/arch/arm/plat-orion/include/plat/common.h b/arch/arm/plat-orion/include/plat/common.h index e00fdb213609..ae2377ef63e5 100644 --- a/arch/arm/plat-orion/include/plat/common.h +++ b/arch/arm/plat-orion/include/plat/common.h @@ -39,12 +39,14 @@ void __init orion_rtc_init(unsigned long mapbase, void __init orion_ge00_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, unsigned long irq, - unsigned long irq_err); + unsigned long irq_err, + unsigned int tx_csum_limit); void __init orion_ge01_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, unsigned long irq, - unsigned long irq_err); + unsigned long irq_err, + unsigned int tx_csum_limit); void __init orion_ge10_init(struct mv643xx_eth_platform_data *eth_data, unsigned long mapbase, diff --git a/arch/arm/plat-s3c24xx/dma.c b/arch/arm/plat-s3c24xx/dma.c index 28f898f75380..db98e7021f0d 100644 --- a/arch/arm/plat-s3c24xx/dma.c +++ b/arch/arm/plat-s3c24xx/dma.c @@ -430,7 +430,7 @@ s3c2410_dma_canload(struct s3c2410_dma_chan *chan) * when necessary. */ -int s3c2410_dma_enqueue(unsigned int channel, void *id, +int s3c2410_dma_enqueue(enum dma_ch channel, void *id, dma_addr_t data, int size) { struct s3c2410_dma_chan *chan = s3c_dma_lookup_channel(channel); diff --git a/arch/arm/plat-samsung/devs.c b/arch/arm/plat-samsung/devs.c index 74e31ce35538..fc49f3dabd76 100644 --- a/arch/arm/plat-samsung/devs.c +++ b/arch/arm/plat-samsung/devs.c @@ -32,6 +32,8 @@ #include <linux/platform_data/s3c-hsudc.h> #include <linux/platform_data/s3c-hsotg.h> +#include <media/s5p_hdmi.h> + #include <asm/irq.h> #include <asm/pmu.h> #include <asm/mach/arch.h> @@ -748,7 +750,8 @@ void __init s5p_i2c_hdmiphy_set_platdata(struct s3c2410_platform_i2c *pd) if (!pd) { pd = &default_i2c_data; - if (soc_is_exynos4210()) + if (soc_is_exynos4210() || + soc_is_exynos4212() || soc_is_exynos4412()) pd->bus_num = 8; else if (soc_is_s5pv210()) pd->bus_num = 3; @@ -759,6 +762,30 @@ void __init s5p_i2c_hdmiphy_set_platdata(struct s3c2410_platform_i2c *pd) npd = s3c_set_platdata(pd, sizeof(struct s3c2410_platform_i2c), &s5p_device_i2c_hdmiphy); } + +struct s5p_hdmi_platform_data s5p_hdmi_def_platdata; + +void __init s5p_hdmi_set_platdata(struct i2c_board_info *hdmiphy_info, + struct i2c_board_info *mhl_info, int mhl_bus) +{ + struct s5p_hdmi_platform_data *pd = &s5p_hdmi_def_platdata; + + if (soc_is_exynos4210() || + soc_is_exynos4212() || soc_is_exynos4412()) + pd->hdmiphy_bus = 8; + else if (soc_is_s5pv210()) + pd->hdmiphy_bus = 3; + else + pd->hdmiphy_bus = 0; + + pd->hdmiphy_info = hdmiphy_info; + pd->mhl_info = mhl_info; + pd->mhl_bus = mhl_bus; + + s3c_set_platdata(pd, sizeof(struct s5p_hdmi_platform_data), + &s5p_device_hdmi); +} + #endif /* CONFIG_S5P_DEV_I2C_HDMIPHY */ /* I2S */ diff --git a/arch/arm/plat-samsung/include/plat/hdmi.h b/arch/arm/plat-samsung/include/plat/hdmi.h new file mode 100644 index 000000000000..331d046ac2c5 --- /dev/null +++ b/arch/arm/plat-samsung/include/plat/hdmi.h @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2012 Samsung Electronics Co.Ltd + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __PLAT_SAMSUNG_HDMI_H +#define __PLAT_SAMSUNG_HDMI_H __FILE__ + +extern void s5p_hdmi_set_platdata(struct i2c_board_info *hdmiphy_info, + struct i2c_board_info *mhl_info, int mhl_bus); + +#endif /* __PLAT_SAMSUNG_HDMI_H */ diff --git a/arch/arm/plat-samsung/pm.c b/arch/arm/plat-samsung/pm.c index 64ab65f0fdbc..15070284343e 100644 --- a/arch/arm/plat-samsung/pm.c +++ b/arch/arm/plat-samsung/pm.c @@ -74,7 +74,7 @@ unsigned char pm_uart_udivslot; #ifdef CONFIG_SAMSUNG_PM_DEBUG -struct pm_uart_save uart_save[CONFIG_SERIAL_SAMSUNG_UARTS]; +static struct pm_uart_save uart_save[CONFIG_SERIAL_SAMSUNG_UARTS]; static void s3c_pm_save_uart(unsigned int uart, struct pm_uart_save *save) { diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 331d574df99c..faf65286574e 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -89,6 +89,7 @@ config ATH79 select CEVT_R4K select CSRC_R4K select DMA_NONCOHERENT + select HAVE_CLK select IRQ_CPU select MIPS_MACHINE select SYS_HAS_CPU_MIPS32_R2 diff --git a/arch/mips/alchemy/board-mtx1.c b/arch/mips/alchemy/board-mtx1.c index 99969484c475..a124c251c0c9 100644 --- a/arch/mips/alchemy/board-mtx1.c +++ b/arch/mips/alchemy/board-mtx1.c @@ -228,6 +228,8 @@ static int mtx1_pci_idsel(unsigned int devsel, int assert) * adapter on the mtx-1 "singleboard" variant. It triggers a custom * logic chip connected to EXT_IO3 (GPIO1) to suppress IDSEL signals. */ + udelay(1); + if (assert && devsel != 0) /* Suppress signal to Cardbus */ alchemy_gpio_set_value(1, 0); /* set EXT_IO3 OFF */ diff --git a/arch/mips/ath79/dev-usb.c b/arch/mips/ath79/dev-usb.c index 36e9570e7bc4..b2a2311ec85b 100644 --- a/arch/mips/ath79/dev-usb.c +++ b/arch/mips/ath79/dev-usb.c @@ -145,6 +145,8 @@ static void __init ar7240_usb_setup(void) ath79_ohci_resources[0].start = AR7240_OHCI_BASE; ath79_ohci_resources[0].end = AR7240_OHCI_BASE + AR7240_OHCI_SIZE - 1; + ath79_ohci_resources[1].start = ATH79_CPU_IRQ_USB; + ath79_ohci_resources[1].end = ATH79_CPU_IRQ_USB; platform_device_register(&ath79_ohci_device); } diff --git a/arch/mips/ath79/gpio.c b/arch/mips/ath79/gpio.c index 29054f211832..48fe762d2526 100644 --- a/arch/mips/ath79/gpio.c +++ b/arch/mips/ath79/gpio.c @@ -188,8 +188,10 @@ void __init ath79_gpio_init(void) if (soc_is_ar71xx()) ath79_gpio_count = AR71XX_GPIO_COUNT; - else if (soc_is_ar724x()) - ath79_gpio_count = AR724X_GPIO_COUNT; + else if (soc_is_ar7240()) + ath79_gpio_count = AR7240_GPIO_COUNT; + else if (soc_is_ar7241() || soc_is_ar7242()) + ath79_gpio_count = AR7241_GPIO_COUNT; else if (soc_is_ar913x()) ath79_gpio_count = AR913X_GPIO_COUNT; else if (soc_is_ar933x()) diff --git a/arch/mips/bcm63xx/dev-spi.c b/arch/mips/bcm63xx/dev-spi.c index e39f73048d4f..f1c9c3e2f678 100644 --- a/arch/mips/bcm63xx/dev-spi.c +++ b/arch/mips/bcm63xx/dev-spi.c @@ -106,11 +106,15 @@ int __init bcm63xx_spi_register(void) if (BCMCPU_IS_6338() || BCMCPU_IS_6348()) { spi_resources[0].end += BCM_6338_RSET_SPI_SIZE - 1; spi_pdata.fifo_size = SPI_6338_MSG_DATA_SIZE; + spi_pdata.msg_type_shift = SPI_6338_MSG_TYPE_SHIFT; + spi_pdata.msg_ctl_width = SPI_6338_MSG_CTL_WIDTH; } if (BCMCPU_IS_6358() || BCMCPU_IS_6368()) { spi_resources[0].end += BCM_6358_RSET_SPI_SIZE - 1; spi_pdata.fifo_size = SPI_6358_MSG_DATA_SIZE; + spi_pdata.msg_type_shift = SPI_6358_MSG_TYPE_SHIFT; + spi_pdata.msg_ctl_width = SPI_6358_MSG_CTL_WIDTH; } bcm63xx_spi_regs_init(); diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index 7fb1f222b8a5..274cd4fad30c 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -61,6 +61,12 @@ static void octeon_irq_set_ciu_mapping(int irq, int line, int bit, octeon_irq_ciu_to_irq[line][bit] = irq; } +static void octeon_irq_force_ciu_mapping(struct irq_domain *domain, + int irq, int line, int bit) +{ + irq_domain_associate(domain, irq, line << 6 | bit); +} + static int octeon_coreid_for_cpu(int cpu) { #ifdef CONFIG_SMP @@ -183,19 +189,9 @@ static void __init octeon_irq_init_core(void) mutex_init(&cd->core_irq_mutex); irq = OCTEON_IRQ_SW0 + i; - switch (irq) { - case OCTEON_IRQ_TIMER: - case OCTEON_IRQ_SW0: - case OCTEON_IRQ_SW1: - case OCTEON_IRQ_5: - case OCTEON_IRQ_PERF: - irq_set_chip_data(irq, cd); - irq_set_chip_and_handler(irq, &octeon_irq_chip_core, - handle_percpu_irq); - break; - default: - break; - } + irq_set_chip_data(irq, cd); + irq_set_chip_and_handler(irq, &octeon_irq_chip_core, + handle_percpu_irq); } } @@ -890,7 +886,6 @@ static int octeon_irq_gpio_xlat(struct irq_domain *d, unsigned int type; unsigned int pin; unsigned int trigger; - struct octeon_irq_gpio_domain_data *gpiod; if (d->of_node != node) return -EINVAL; @@ -925,8 +920,7 @@ static int octeon_irq_gpio_xlat(struct irq_domain *d, break; } *out_type = type; - gpiod = d->host_data; - *out_hwirq = gpiod->base_hwirq + pin; + *out_hwirq = pin; return 0; } @@ -996,19 +990,21 @@ static int octeon_irq_ciu_map(struct irq_domain *d, static int octeon_irq_gpio_map(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw) { - unsigned int line = hw >> 6; - unsigned int bit = hw & 63; + struct octeon_irq_gpio_domain_data *gpiod = d->host_data; + unsigned int line, bit; if (!octeon_irq_virq_in_range(virq)) return -EINVAL; + hw += gpiod->base_hwirq; + line = hw >> 6; + bit = hw & 63; if (line > 1 || octeon_irq_ciu_to_irq[line][bit] != 0) return -EINVAL; octeon_irq_set_ciu_mapping(virq, line, bit, octeon_irq_gpio_chip, octeon_irq_handle_gpio); - return 0; } @@ -1149,6 +1145,7 @@ static void __init octeon_irq_init_ciu(void) struct irq_chip *chip_wd; struct device_node *gpio_node; struct device_node *ciu_node; + struct irq_domain *ciu_domain = NULL; octeon_irq_init_ciu_percpu(); octeon_irq_setup_secondary = octeon_irq_setup_secondary_ciu; @@ -1177,31 +1174,6 @@ static void __init octeon_irq_init_ciu(void) /* Mips internal */ octeon_irq_init_core(); - /* CIU_0 */ - for (i = 0; i < 16; i++) - octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_WORKQ0, 0, i + 0, chip, handle_level_irq); - - octeon_irq_set_ciu_mapping(OCTEON_IRQ_MBOX0, 0, 32, chip_mbox, handle_percpu_irq); - octeon_irq_set_ciu_mapping(OCTEON_IRQ_MBOX1, 0, 33, chip_mbox, handle_percpu_irq); - - for (i = 0; i < 4; i++) - octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_PCI_INT0, 0, i + 36, chip, handle_level_irq); - for (i = 0; i < 4; i++) - octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_PCI_MSI0, 0, i + 40, chip, handle_level_irq); - - octeon_irq_set_ciu_mapping(OCTEON_IRQ_RML, 0, 46, chip, handle_level_irq); - for (i = 0; i < 4; i++) - octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_TIMER0, 0, i + 52, chip, handle_edge_irq); - - octeon_irq_set_ciu_mapping(OCTEON_IRQ_USB0, 0, 56, chip, handle_level_irq); - octeon_irq_set_ciu_mapping(OCTEON_IRQ_BOOTDMA, 0, 63, chip, handle_level_irq); - - /* CIU_1 */ - for (i = 0; i < 16; i++) - octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_WDOG0, 1, i + 0, chip_wd, handle_level_irq); - - octeon_irq_set_ciu_mapping(OCTEON_IRQ_USB1, 1, 17, chip, handle_level_irq); - gpio_node = of_find_compatible_node(NULL, NULL, "cavium,octeon-3860-gpio"); if (gpio_node) { struct octeon_irq_gpio_domain_data *gpiod; @@ -1219,10 +1191,35 @@ static void __init octeon_irq_init_ciu(void) ciu_node = of_find_compatible_node(NULL, NULL, "cavium,octeon-3860-ciu"); if (ciu_node) { - irq_domain_add_tree(ciu_node, &octeon_irq_domain_ciu_ops, NULL); + ciu_domain = irq_domain_add_tree(ciu_node, &octeon_irq_domain_ciu_ops, NULL); of_node_put(ciu_node); } else - pr_warn("Cannot find device node for cavium,octeon-3860-ciu.\n"); + panic("Cannot find device node for cavium,octeon-3860-ciu."); + + /* CIU_0 */ + for (i = 0; i < 16; i++) + octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_WORKQ0, 0, i + 0); + + octeon_irq_set_ciu_mapping(OCTEON_IRQ_MBOX0, 0, 32, chip_mbox, handle_percpu_irq); + octeon_irq_set_ciu_mapping(OCTEON_IRQ_MBOX1, 0, 33, chip_mbox, handle_percpu_irq); + + for (i = 0; i < 4; i++) + octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_PCI_INT0, 0, i + 36); + for (i = 0; i < 4; i++) + octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_PCI_MSI0, 0, i + 40); + + octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_RML, 0, 46); + for (i = 0; i < 4; i++) + octeon_irq_force_ciu_mapping(ciu_domain, i + OCTEON_IRQ_TIMER0, 0, i + 52); + + octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB0, 0, 56); + octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_BOOTDMA, 0, 63); + + /* CIU_1 */ + for (i = 0; i < 16; i++) + octeon_irq_set_ciu_mapping(i + OCTEON_IRQ_WDOG0, 1, i + 0, chip_wd, handle_level_irq); + + octeon_irq_force_ciu_mapping(ciu_domain, OCTEON_IRQ_USB1, 1, 17); /* Enable the CIU lines */ set_c0_status(STATUSF_IP3 | STATUSF_IP2); diff --git a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h index 1caa78ad06d5..dde504477fac 100644 --- a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h +++ b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h @@ -393,7 +393,8 @@ #define AR71XX_GPIO_REG_FUNC 0x28 #define AR71XX_GPIO_COUNT 16 -#define AR724X_GPIO_COUNT 18 +#define AR7240_GPIO_COUNT 18 +#define AR7241_GPIO_COUNT 20 #define AR913X_GPIO_COUNT 22 #define AR933X_GPIO_COUNT 30 #define AR934X_GPIO_COUNT 23 diff --git a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h index 4476fa03bf36..6ddae926bf79 100644 --- a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h @@ -42,7 +42,6 @@ #define cpu_has_mips64r1 0 #define cpu_has_mips64r2 0 -#define cpu_has_dsp 0 #define cpu_has_mipsmt 0 #define cpu_has_64bits 0 diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_spi.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_spi.h index 7d98dbe5d4b5..c9bae1362606 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_spi.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_spi.h @@ -9,6 +9,8 @@ int __init bcm63xx_spi_register(void); struct bcm63xx_spi_pdata { unsigned int fifo_size; + unsigned int msg_type_shift; + unsigned int msg_ctl_width; int bus_num; int num_chipselect; u32 speed_hz; diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h index 4ccc2a748aff..61f2a2a5099d 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h @@ -1054,7 +1054,8 @@ #define SPI_6338_FILL_BYTE 0x07 #define SPI_6338_MSG_TAIL 0x09 #define SPI_6338_RX_TAIL 0x0b -#define SPI_6338_MSG_CTL 0x40 +#define SPI_6338_MSG_CTL 0x40 /* 8-bits register */ +#define SPI_6338_MSG_CTL_WIDTH 8 #define SPI_6338_MSG_DATA 0x41 #define SPI_6338_MSG_DATA_SIZE 0x3f #define SPI_6338_RX_DATA 0x80 @@ -1070,7 +1071,8 @@ #define SPI_6348_FILL_BYTE 0x07 #define SPI_6348_MSG_TAIL 0x09 #define SPI_6348_RX_TAIL 0x0b -#define SPI_6348_MSG_CTL 0x40 +#define SPI_6348_MSG_CTL 0x40 /* 8-bits register */ +#define SPI_6348_MSG_CTL_WIDTH 8 #define SPI_6348_MSG_DATA 0x41 #define SPI_6348_MSG_DATA_SIZE 0x3f #define SPI_6348_RX_DATA 0x80 @@ -1078,6 +1080,7 @@ /* BCM 6358 SPI core */ #define SPI_6358_MSG_CTL 0x00 /* 16-bits register */ +#define SPI_6358_MSG_CTL_WIDTH 16 #define SPI_6358_MSG_DATA 0x02 #define SPI_6358_MSG_DATA_SIZE 0x21e #define SPI_6358_RX_DATA 0x400 @@ -1094,6 +1097,7 @@ /* BCM 6358 SPI core */ #define SPI_6368_MSG_CTL 0x00 /* 16-bits register */ +#define SPI_6368_MSG_CTL_WIDTH 16 #define SPI_6368_MSG_DATA 0x02 #define SPI_6368_MSG_DATA_SIZE 0x21e #define SPI_6368_RX_DATA 0x400 @@ -1115,7 +1119,10 @@ #define SPI_HD_W 0x01 #define SPI_HD_R 0x02 #define SPI_BYTE_CNT_SHIFT 0 -#define SPI_MSG_TYPE_SHIFT 14 +#define SPI_6338_MSG_TYPE_SHIFT 6 +#define SPI_6348_MSG_TYPE_SHIFT 6 +#define SPI_6358_MSG_TYPE_SHIFT 14 +#define SPI_6368_MSG_TYPE_SHIFT 14 /* Command */ #define SPI_CMD_NOOP 0x00 diff --git a/arch/mips/include/asm/mach-cavium-octeon/irq.h b/arch/mips/include/asm/mach-cavium-octeon/irq.h index 418992042f6f..c22a3078bf11 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/irq.h +++ b/arch/mips/include/asm/mach-cavium-octeon/irq.h @@ -21,14 +21,10 @@ enum octeon_irq { OCTEON_IRQ_TIMER, /* sources in CIU_INTX_EN0 */ OCTEON_IRQ_WORKQ0, - OCTEON_IRQ_GPIO0 = OCTEON_IRQ_WORKQ0 + 16, - OCTEON_IRQ_WDOG0 = OCTEON_IRQ_GPIO0 + 16, + OCTEON_IRQ_WDOG0 = OCTEON_IRQ_WORKQ0 + 16, OCTEON_IRQ_WDOG15 = OCTEON_IRQ_WDOG0 + 15, OCTEON_IRQ_MBOX0 = OCTEON_IRQ_WDOG0 + 16, OCTEON_IRQ_MBOX1, - OCTEON_IRQ_UART0, - OCTEON_IRQ_UART1, - OCTEON_IRQ_UART2, OCTEON_IRQ_PCI_INT0, OCTEON_IRQ_PCI_INT1, OCTEON_IRQ_PCI_INT2, @@ -38,8 +34,6 @@ enum octeon_irq { OCTEON_IRQ_PCI_MSI2, OCTEON_IRQ_PCI_MSI3, - OCTEON_IRQ_TWSI, - OCTEON_IRQ_TWSI2, OCTEON_IRQ_RML, OCTEON_IRQ_TIMER0, OCTEON_IRQ_TIMER1, @@ -47,8 +41,6 @@ enum octeon_irq { OCTEON_IRQ_TIMER3, OCTEON_IRQ_USB0, OCTEON_IRQ_USB1, - OCTEON_IRQ_MII0, - OCTEON_IRQ_MII1, OCTEON_IRQ_BOOTDMA, #ifndef CONFIG_PCI_MSI OCTEON_IRQ_LAST = 127 diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h index 7531ecd654d6..dca8bce8c7ab 100644 --- a/arch/mips/include/asm/module.h +++ b/arch/mips/include/asm/module.h @@ -10,6 +10,7 @@ struct mod_arch_specific { struct list_head dbe_list; const struct exception_table_entry *dbe_start; const struct exception_table_entry *dbe_end; + struct mips_hi16 *r_mips_hi16_list; }; typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */ diff --git a/arch/mips/include/asm/r4k-timer.h b/arch/mips/include/asm/r4k-timer.h index a37d12b3b61c..afe9e0e03fe9 100644 --- a/arch/mips/include/asm/r4k-timer.h +++ b/arch/mips/include/asm/r4k-timer.h @@ -12,16 +12,16 @@ #ifdef CONFIG_SYNC_R4K -extern void synchronise_count_master(void); -extern void synchronise_count_slave(void); +extern void synchronise_count_master(int cpu); +extern void synchronise_count_slave(int cpu); #else -static inline void synchronise_count_master(void) +static inline void synchronise_count_master(int cpu) { } -static inline void synchronise_count_slave(void) +static inline void synchronise_count_slave(int cpu) { } diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c index a5066b1c3de3..4f8c3cba8c0c 100644 --- a/arch/mips/kernel/module.c +++ b/arch/mips/kernel/module.c @@ -39,8 +39,6 @@ struct mips_hi16 { Elf_Addr value; }; -static struct mips_hi16 *mips_hi16_list; - static LIST_HEAD(dbe_list); static DEFINE_SPINLOCK(dbe_lock); @@ -128,8 +126,8 @@ static int apply_r_mips_hi16_rel(struct module *me, u32 *location, Elf_Addr v) n->addr = (Elf_Addr *)location; n->value = v; - n->next = mips_hi16_list; - mips_hi16_list = n; + n->next = me->arch.r_mips_hi16_list; + me->arch.r_mips_hi16_list = n; return 0; } @@ -142,18 +140,28 @@ static int apply_r_mips_hi16_rela(struct module *me, u32 *location, Elf_Addr v) return 0; } +static void free_relocation_chain(struct mips_hi16 *l) +{ + struct mips_hi16 *next; + + while (l) { + next = l->next; + kfree(l); + l = next; + } +} + static int apply_r_mips_lo16_rel(struct module *me, u32 *location, Elf_Addr v) { unsigned long insnlo = *location; + struct mips_hi16 *l; Elf_Addr val, vallo; /* Sign extend the addend we extract from the lo insn. */ vallo = ((insnlo & 0xffff) ^ 0x8000) - 0x8000; - if (mips_hi16_list != NULL) { - struct mips_hi16 *l; - - l = mips_hi16_list; + if (me->arch.r_mips_hi16_list != NULL) { + l = me->arch.r_mips_hi16_list; while (l != NULL) { struct mips_hi16 *next; unsigned long insn; @@ -188,7 +196,7 @@ static int apply_r_mips_lo16_rel(struct module *me, u32 *location, Elf_Addr v) l = next; } - mips_hi16_list = NULL; + me->arch.r_mips_hi16_list = NULL; } /* @@ -201,6 +209,9 @@ static int apply_r_mips_lo16_rel(struct module *me, u32 *location, Elf_Addr v) return 0; out_danger: + free_relocation_chain(l); + me->arch.r_mips_hi16_list = NULL; + pr_err("module %s: dangerous R_MIPS_LO16 REL relocation\n", me->name); return -ENOEXEC; @@ -273,6 +284,7 @@ int apply_relocate(Elf_Shdr *sechdrs, const char *strtab, pr_debug("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); + me->arch.r_mips_hi16_list = NULL; for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -296,6 +308,19 @@ int apply_relocate(Elf_Shdr *sechdrs, const char *strtab, return res; } + /* + * Normally the hi16 list should be deallocated at this point. A + * malformed binary however could contain a series of R_MIPS_HI16 + * relocations not followed by a R_MIPS_LO16 relocation. In that + * case, free up the list and return an error. + */ + if (me->arch.r_mips_hi16_list) { + free_relocation_chain(me->arch.r_mips_hi16_list); + me->arch.r_mips_hi16_list = NULL; + + return -ENOEXEC; + } + return 0; } diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 31637d8c8738..9005bf9fb859 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -130,7 +130,7 @@ asmlinkage __cpuinit void start_secondary(void) cpu_set(cpu, cpu_callin_map); - synchronise_count_slave(); + synchronise_count_slave(cpu); /* * irq will be enabled in ->smp_finish(), enabling it too early @@ -173,7 +173,6 @@ void smp_send_stop(void) void __init smp_cpus_done(unsigned int max_cpus) { mp_ops->cpus_done(); - synchronise_count_master(); } /* called from main before smp_init() */ @@ -206,6 +205,7 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle) while (!cpu_isset(cpu, cpu_callin_map)) udelay(100); + synchronise_count_master(cpu); return 0; } diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c index 842d55e411fd..7f1eca3858de 100644 --- a/arch/mips/kernel/sync-r4k.c +++ b/arch/mips/kernel/sync-r4k.c @@ -28,12 +28,11 @@ static atomic_t __cpuinitdata count_reference = ATOMIC_INIT(0); #define COUNTON 100 #define NR_LOOPS 5 -void __cpuinit synchronise_count_master(void) +void __cpuinit synchronise_count_master(int cpu) { int i; unsigned long flags; unsigned int initcount; - int nslaves; #ifdef CONFIG_MIPS_MT_SMTC /* @@ -43,8 +42,7 @@ void __cpuinit synchronise_count_master(void) return; #endif - printk(KERN_INFO "Synchronize counters across %u CPUs: ", - num_online_cpus()); + printk(KERN_INFO "Synchronize counters for CPU %u: ", cpu); local_irq_save(flags); @@ -52,7 +50,7 @@ void __cpuinit synchronise_count_master(void) * Notify the slaves that it's time to start */ atomic_set(&count_reference, read_c0_count()); - atomic_set(&count_start_flag, 1); + atomic_set(&count_start_flag, cpu); smp_wmb(); /* Count will be initialised to current timer for all CPU's */ @@ -69,10 +67,9 @@ void __cpuinit synchronise_count_master(void) * two CPUs. */ - nslaves = num_online_cpus()-1; for (i = 0; i < NR_LOOPS; i++) { - /* slaves loop on '!= ncpus' */ - while (atomic_read(&count_count_start) != nslaves) + /* slaves loop on '!= 2' */ + while (atomic_read(&count_count_start) != 1) mb(); atomic_set(&count_count_stop, 0); smp_wmb(); @@ -89,7 +86,7 @@ void __cpuinit synchronise_count_master(void) /* * Wait for all slaves to leave the synchronization point: */ - while (atomic_read(&count_count_stop) != nslaves) + while (atomic_read(&count_count_stop) != 1) mb(); atomic_set(&count_count_start, 0); smp_wmb(); @@ -97,6 +94,7 @@ void __cpuinit synchronise_count_master(void) } /* Arrange for an interrupt in a short while */ write_c0_compare(read_c0_count() + COUNTON); + atomic_set(&count_start_flag, 0); local_irq_restore(flags); @@ -108,11 +106,10 @@ void __cpuinit synchronise_count_master(void) printk("done.\n"); } -void __cpuinit synchronise_count_slave(void) +void __cpuinit synchronise_count_slave(int cpu) { int i; unsigned int initcount; - int ncpus; #ifdef CONFIG_MIPS_MT_SMTC /* @@ -127,16 +124,15 @@ void __cpuinit synchronise_count_slave(void) * so we first wait for the master to say everyone is ready */ - while (!atomic_read(&count_start_flag)) + while (atomic_read(&count_start_flag) != cpu) mb(); /* Count will be initialised to next expire for all CPU's */ initcount = atomic_read(&count_reference); - ncpus = num_online_cpus(); for (i = 0; i < NR_LOOPS; i++) { atomic_inc(&count_count_start); - while (atomic_read(&count_count_start) != ncpus) + while (atomic_read(&count_count_start) != 2) mb(); /* @@ -146,7 +142,7 @@ void __cpuinit synchronise_count_slave(void) write_c0_count(initcount); atomic_inc(&count_count_stop); - while (atomic_read(&count_count_stop) != ncpus) + while (atomic_read(&count_count_stop) != 2) mb(); } /* Arrange for an interrupt in a short while */ diff --git a/arch/mips/mti-malta/malta-pci.c b/arch/mips/mti-malta/malta-pci.c index 284dea54faf5..2147cb34e705 100644 --- a/arch/mips/mti-malta/malta-pci.c +++ b/arch/mips/mti-malta/malta-pci.c @@ -252,16 +252,3 @@ void __init mips_pcibios_init(void) register_pci_controller(controller); } - -/* Enable PCI 2.1 compatibility in PIIX4 */ -static void __devinit quirk_dlcsetup(struct pci_dev *dev) -{ - u8 odlc, ndlc; - (void) pci_read_config_byte(dev, 0x82, &odlc); - /* Enable passive releases and delayed transaction */ - ndlc = odlc | 7; - (void) pci_write_config_byte(dev, 0x82, ndlc); -} - -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0, - quirk_dlcsetup); diff --git a/arch/mips/pci/pci-ar724x.c b/arch/mips/pci/pci-ar724x.c index 414a7459858d..86d77a666458 100644 --- a/arch/mips/pci/pci-ar724x.c +++ b/arch/mips/pci/pci-ar724x.c @@ -23,9 +23,12 @@ #define AR724X_PCI_MEM_BASE 0x10000000 #define AR724X_PCI_MEM_SIZE 0x08000000 +#define AR724X_PCI_REG_RESET 0x18 #define AR724X_PCI_REG_INT_STATUS 0x4c #define AR724X_PCI_REG_INT_MASK 0x50 +#define AR724X_PCI_RESET_LINK_UP BIT(0) + #define AR724X_PCI_INT_DEV0 BIT(14) #define AR724X_PCI_IRQ_COUNT 1 @@ -38,6 +41,15 @@ static void __iomem *ar724x_pci_ctrl_base; static u32 ar724x_pci_bar0_value; static bool ar724x_pci_bar0_is_cached; +static bool ar724x_pci_link_up; + +static inline bool ar724x_pci_check_link(void) +{ + u32 reset; + + reset = __raw_readl(ar724x_pci_ctrl_base + AR724X_PCI_REG_RESET); + return reset & AR724X_PCI_RESET_LINK_UP; +} static int ar724x_pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, uint32_t *value) @@ -46,6 +58,9 @@ static int ar724x_pci_read(struct pci_bus *bus, unsigned int devfn, int where, void __iomem *base; u32 data; + if (!ar724x_pci_link_up) + return PCIBIOS_DEVICE_NOT_FOUND; + if (devfn) return PCIBIOS_DEVICE_NOT_FOUND; @@ -96,6 +111,9 @@ static int ar724x_pci_write(struct pci_bus *bus, unsigned int devfn, int where, u32 data; int s; + if (!ar724x_pci_link_up) + return PCIBIOS_DEVICE_NOT_FOUND; + if (devfn) return PCIBIOS_DEVICE_NOT_FOUND; @@ -280,6 +298,10 @@ int __init ar724x_pcibios_init(int irq) if (ar724x_pci_ctrl_base == NULL) goto err_unmap_devcfg; + ar724x_pci_link_up = ar724x_pci_check_link(); + if (!ar724x_pci_link_up) + pr_warn("ar724x: PCIe link is down\n"); + ar724x_pci_irq_init(irq); register_pci_controller(&ar724x_pci_controller); diff --git a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi index 8d35d2c1f694..4f9c9f682ecf 100644 --- a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi @@ -345,6 +345,13 @@ /include/ "qoriq-duart-1.dtsi" /include/ "qoriq-gpio-0.dtsi" /include/ "qoriq-usb2-mph-0.dtsi" + usb@210000 { + compatible = "fsl-usb2-mph-v1.6", "fsl,mpc85xx-usb2-mph", "fsl-usb2-mph"; + port0; + }; /include/ "qoriq-usb2-dr-0.dtsi" + usb@211000 { + compatible = "fsl-usb2-dr-v1.6", "fsl,mpc85xx-usb2-dr", "fsl-usb2-dr"; + }; /include/ "qoriq-sec4.0-0.dtsi" }; diff --git a/arch/powerpc/configs/85xx/p1023rds_defconfig b/arch/powerpc/configs/85xx/p1023rds_defconfig index f4337bacd0e7..26e541c4662b 100644 --- a/arch/powerpc/configs/85xx/p1023rds_defconfig +++ b/arch/powerpc/configs/85xx/p1023rds_defconfig @@ -6,28 +6,27 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_AUDIT=y -CONFIG_SPARSE_IRQ=y +CONFIG_IRQ_DOMAIN_DEBUG=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_KALLSYMS_ALL=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_EMBEDDED=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_MAC_PARTITION=y CONFIG_P1023_RDS=y CONFIG_QUICC_ENGINE=y CONFIG_QE_GPIO=y CONFIG_CPM2=y -CONFIG_GPIO_MPC8XXX=y CONFIG_HIGHMEM=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_MATH_EMULATION=y @@ -63,11 +62,11 @@ CONFIG_INET_ESP=y CONFIG_IPV6=y CONFIG_IP_SCTP=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 -CONFIG_MISC_DEVICES=y CONFIG_EEPROM_LEGACY=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y @@ -80,15 +79,14 @@ CONFIG_SATA_FSL=y CONFIG_SATA_SIL24=y CONFIG_NETDEVICES=y CONFIG_DUMMY=y +CONFIG_FS_ENET=y +CONFIG_FSL_PQ_MDIO=y +CONFIG_E1000E=y CONFIG_MARVELL_PHY=y CONFIG_DAVICOM_PHY=y CONFIG_CICADA_PHY=y CONFIG_VITESSE_PHY=y CONFIG_FIXED_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_FS_ENET=y -CONFIG_E1000E=y -CONFIG_FSL_PQ_MDIO=y CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set @@ -98,16 +96,15 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=2 CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y CONFIG_SERIAL_QE=m -CONFIG_HW_RANDOM=y CONFIG_NVRAM=y CONFIG_I2C=y CONFIG_I2C_CPM=m CONFIG_I2C_MPC=y +CONFIG_GPIO_MPC8XXX=y # CONFIG_HWMON is not set CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_SOUND=y @@ -123,7 +120,6 @@ CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y # CONFIG_NET_DMA is not set CONFIG_STAGING=y -# CONFIG_STAGING_EXCLUDE_BUILD is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set @@ -150,22 +146,15 @@ CONFIG_QNX4FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y CONFIG_CRC_T10DIF=y CONFIG_FRAME_WARN=8092 CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig index cbb98c1234fd..8b3d57c1ebe8 100644 --- a/arch/powerpc/configs/corenet32_smp_defconfig +++ b/arch/powerpc/configs/corenet32_smp_defconfig @@ -6,8 +6,8 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_AUDIT=y -CONFIG_SPARSE_IRQ=y -CONFIG_RCU_TRACE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 @@ -21,23 +21,22 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_MAC_PARTITION=y CONFIG_P2041_RDB=y CONFIG_P3041_DS=y CONFIG_P4080_DS=y CONFIG_P5020_DS=y CONFIG_HIGHMEM=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y CONFIG_FORCE_MAX_ZONEORDER=13 -CONFIG_FSL_LBC=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y -CONFIG_PCI_MSI=y # CONFIG_PCIEASPM is not set +CONFIG_PCI_MSI=y CONFIG_RAPIDIO=y CONFIG_FSL_RIO=y CONFIG_NET=y @@ -70,6 +69,7 @@ CONFIG_INET_IPCOMP=y CONFIG_IPV6=y CONFIG_IP_SCTP=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_CHAR=y @@ -77,17 +77,14 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y +CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_ECC=y -CONFIG_MTD_NAND_IDS=y -CONFIG_MTD_NAND_FSL_IFC=y CONFIG_MTD_NAND_FSL_ELBC=y -CONFIG_MTD_M25P80=y +CONFIG_MTD_NAND_FSL_IFC=y CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 -CONFIG_MISC_DEVICES=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y @@ -115,11 +112,9 @@ CONFIG_SERIO_LIBPS2=y CONFIG_PPC_EPAPR_HV_BYTECHAN=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y -CONFIG_HW_RANDOM=y CONFIG_NVRAM=y CONFIG_I2C=y CONFIG_I2C_CHARDEV=y @@ -132,7 +127,6 @@ CONFIG_SPI_FSL_ESPI=y CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_USB_HID=m CONFIG_USB=y -CONFIG_USB_DEVICEFS=y CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_FSL=y @@ -142,8 +136,6 @@ CONFIG_USB_OHCI_HCD_PPC_OF_LE=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y -CONFIG_MMC_SDHCI_OF=y -CONFIG_MMC_SDHCI_OF_ESDHC=y CONFIG_EDAC=y CONFIG_EDAC_MM_EDAC=y CONFIG_EDAC_MPC85XX=y @@ -170,19 +162,16 @@ CONFIG_HUGETLBFS=y CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=m CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_INFO=y -CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_RCU_TRACE=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index dd89de8b0b7f..0516e22ca3de 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -56,6 +56,7 @@ CONFIG_INET_ESP=y CONFIG_IPV6=y CONFIG_IP_SCTP=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_CHAR=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 15130066e5e2..07b7f2af2dca 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -1,8 +1,10 @@ +CONFIG_PPC64=y +CONFIG_ALTIVEC=y +CONFIG_SMP=y +CONFIG_NR_CPUS=4 CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_BLK_DEV_INITRD=y @@ -13,15 +15,16 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y -CONFIG_SMP=y -CONFIG_NR_CPUS=4 -CONFIG_KEXEC=y -# CONFIG_RELOCATABLE is not set +# CONFIG_PPC_PSERIES is not set CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_PMAC64=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_KEXEC=y +CONFIG_IRQ_ALL_CPUS=y +# CONFIG_MIGRATION is not set CONFIG_PCI_MSI=y CONFIG_NET=y CONFIG_PACKET=y @@ -49,6 +52,7 @@ CONFIG_NF_CT_NETLINK=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_QUEUE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y @@ -56,6 +60,8 @@ CONFIG_BLK_DEV_RAM_SIZE=65536 CONFIG_CDROM_PKTCDVD=m CONFIG_IDE=y CONFIG_BLK_DEV_IDECD=y +CONFIG_BLK_DEV_IDE_PMAC=y +CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y @@ -79,24 +85,33 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m -CONFIG_MACINTOSH_DRIVERS=y +CONFIG_IEEE1394=y +CONFIG_IEEE1394_OHCI1394=y +CONFIG_IEEE1394_SBP2=m +CONFIG_IEEE1394_ETH1394=m +CONFIG_IEEE1394_RAWIO=y +CONFIG_IEEE1394_VIDEO1394=m +CONFIG_IEEE1394_DV1394=m +CONFIG_ADB_PMU=y +CONFIG_PMAC_SMU=y CONFIG_MAC_EMUMOUSEBTN=y +CONFIG_THERM_PM72=y +CONFIG_WINDFARM=y +CONFIG_WINDFARM_PM81=y +CONFIG_WINDFARM_PM91=y +CONFIG_WINDFARM_PM112=y +CONFIG_WINDFARM_PM121=y CONFIG_NETDEVICES=y -CONFIG_BONDING=m CONFIG_DUMMY=m -CONFIG_MII=y +CONFIG_BONDING=m CONFIG_TUN=m +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +CONFIG_SUNGEM=y CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y -CONFIG_TIGON3=y CONFIG_E1000=y -CONFIG_SUNGEM=y -CONFIG_PPP=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPPOE=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m +CONFIG_TIGON3=y CONFIG_USB_CATC=m CONFIG_USB_KAWETH=m CONFIG_USB_PEGASUS=m @@ -106,24 +121,36 @@ CONFIG_USB_USBNET=m # CONFIG_USB_NET_NET1080 is not set # CONFIG_USB_NET_CDC_SUBSET is not set # CONFIG_USB_NET_ZAURUS is not set +CONFIG_PPP=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPPOE=m # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_JOYDEV=m CONFIG_INPUT_EVDEV=y +# CONFIG_KEYBOARD_ATKBD is not set # CONFIG_MOUSE_PS2 is not set +# CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set -CONFIG_VT_HW_CONSOLE_BINDING=y # CONFIG_HW_RANDOM is not set CONFIG_GEN_RTC=y CONFIG_RAW_DRIVER=y CONFIG_I2C_CHARDEV=y # CONFIG_HWMON is not set -CONFIG_AGP=y -CONFIG_DRM=y -CONFIG_DRM_NOUVEAU=y +CONFIG_AGP=m +CONFIG_AGP_UNINORTH=m CONFIG_VIDEO_OUTPUT_CONTROL=m +CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_TILEBLITTING=y +CONFIG_FB_OF=y +CONFIG_FB_NVIDIA=y +CONFIG_FB_NVIDIA_I2C=y CONFIG_FB_RADEON=y +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_SND=m @@ -131,7 +158,15 @@ CONFIG_SND_SEQUENCER=m CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_POWERMAC=m +CONFIG_SND_AOA=m +CONFIG_SND_AOA_FABRIC_LAYOUT=m +CONFIG_SND_AOA_ONYX=m +CONFIG_SND_AOA_TAS=m +CONFIG_SND_AOA_TOONIE=m CONFIG_SND_USB_AUDIO=m +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_PANTHERLORD=y @@ -139,12 +174,13 @@ CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_USB=y +CONFIG_USB_DEVICEFS=y CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y +# CONFIG_USB_EHCI_HCD_PPC_OF is not set CONFIG_USB_OHCI_HCD=y +CONFIG_USB_OHCI_HCD_PPC_OF_BE=y CONFIG_USB_ACM=m CONFIG_USB_PRINTER=y CONFIG_USB_STORAGE=y @@ -208,6 +244,8 @@ CONFIG_REISERFS_FS_POSIX_ACL=y CONFIG_REISERFS_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_POSIX_ACL=y +CONFIG_INOTIFY=y +CONFIG_AUTOFS_FS=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_ZISOFS=y @@ -221,12 +259,14 @@ CONFIG_HFS_FS=m CONFIG_HFSPLUS_FS=m CONFIG_CRAMFS=y CONFIG_NFS_FS=y +CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NFSD=y CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_CIFS=m +CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_1250=y CONFIG_NLS_CODEPAGE_1251=y @@ -234,23 +274,29 @@ CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_15=y CONFIG_NLS_UTF8=y +CONFIG_CRC_T10DIF=y +CONFIG_LIBCRC32C=m CONFIG_MAGIC_SYSRQ=y -# CONFIG_UNUSED_SYMBOLS is not set CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y +# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_LATENCYTOP=y -CONFIG_STRICT_DEVMEM=y +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_BOOTX_TEXT=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m +CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_ANUBIS=m +CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m @@ -260,6 +306,3 @@ CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set -# CONFIG_VIRTUALIZATION is not set -CONFIG_CRC_T10DIF=y -CONFIG_LIBCRC32C=m diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig index 5aac9a8bc53b..9352e4430c3b 100644 --- a/arch/powerpc/configs/mpc83xx_defconfig +++ b/arch/powerpc/configs/mpc83xx_defconfig @@ -2,12 +2,12 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y @@ -25,7 +25,6 @@ CONFIG_ASP834x=y CONFIG_QUICC_ENGINE=y CONFIG_QE_GPIO=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -42,10 +41,9 @@ CONFIG_INET_ESP=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_OF_PARTS=y CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y @@ -64,15 +62,14 @@ CONFIG_ATA=y CONFIG_SATA_FSL=y CONFIG_SATA_SIL=y CONFIG_NETDEVICES=y +CONFIG_MII=y +CONFIG_UCC_GETH=y +CONFIG_GIANFAR=y CONFIG_MARVELL_PHY=y CONFIG_DAVICOM_PHY=y CONFIG_VITESSE_PHY=y CONFIG_ICPLUS_PHY=y CONFIG_FIXED_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -CONFIG_GIANFAR=y -CONFIG_UCC_GETH=y CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set @@ -112,17 +109,12 @@ CONFIG_RTC_DRV_DS1374=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y CONFIG_CRC_T10DIF=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA256=y diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index 03ee911c4577..8b5bda27d248 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -5,7 +5,9 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_AUDIT=y -CONFIG_SPARSE_IRQ=y +CONFIG_IRQ_DOMAIN_DEBUG=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 @@ -17,6 +19,8 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_MAC_PARTITION=y CONFIG_MPC8540_ADS=y CONFIG_MPC8560_ADS=y CONFIG_MPC85xx_CDS=y @@ -40,8 +44,6 @@ CONFIG_SBC8548=y CONFIG_QUICC_ENGINE=y CONFIG_QE_GPIO=y CONFIG_HIGHMEM=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=m CONFIG_MATH_EMULATION=y CONFIG_FORCE_MAX_ZONEORDER=12 @@ -74,36 +76,25 @@ CONFIG_INET_ESP=y CONFIG_IPV6=y CONFIG_IP_SCTP=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y CONFIG_FTL=y -CONFIG_MTD_GEN_PROBE=y -CONFIG_MTD_MAP_BANK_WIDTH_1=y -CONFIG_MTD_MAP_BANK_WIDTH_2=y -CONFIG_MTD_MAP_BANK_WIDTH_4=y -CONFIG_MTD_CFI_I1=y -CONFIG_MTD_CFI_I2=y +CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_CFI_UTIL=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_OF_PARTS=y +CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y -CONFIG_MTD_NAND_IDS=y -CONFIG_MTD_NAND_ECC=y -CONFIG_MTD_M25P80=y CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 -CONFIG_MISC_DEVICES=y CONFIG_EEPROM_LEGACY=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y @@ -115,6 +106,7 @@ CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_FSL=y CONFIG_PATA_ALI=y +CONFIG_PATA_VIA=y CONFIG_NETDEVICES=y CONFIG_DUMMY=y CONFIG_FS_ENET=y @@ -134,7 +126,6 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=2 CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y @@ -183,7 +174,6 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y -CONFIG_USB_DEVICEFS=y CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_FSL=y @@ -229,18 +219,13 @@ CONFIG_QNX4FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y CONFIG_CRC_T10DIF=y CONFIG_DEBUG_FS=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_INFO=y -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index fdfa84dc908f..b0974e7e98ae 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -7,7 +7,9 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_AUDIT=y -CONFIG_SPARSE_IRQ=y +CONFIG_IRQ_DOMAIN_DEBUG=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 @@ -19,6 +21,8 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_MAC_PARTITION=y CONFIG_MPC8540_ADS=y CONFIG_MPC8560_ADS=y CONFIG_MPC85xx_CDS=y @@ -42,8 +46,6 @@ CONFIG_SBC8548=y CONFIG_QUICC_ENGINE=y CONFIG_QE_GPIO=y CONFIG_HIGHMEM=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=m CONFIG_MATH_EMULATION=y CONFIG_IRQ_ALL_CPUS=y @@ -77,36 +79,25 @@ CONFIG_INET_ESP=y CONFIG_IPV6=y CONFIG_IP_SCTP=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y CONFIG_FTL=y -CONFIG_MTD_GEN_PROBE=y -CONFIG_MTD_MAP_BANK_WIDTH_1=y -CONFIG_MTD_MAP_BANK_WIDTH_2=y -CONFIG_MTD_MAP_BANK_WIDTH_4=y -CONFIG_MTD_CFI_I1=y -CONFIG_MTD_CFI_I2=y +CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_CFI_UTIL=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_PARTITIONS=y -CONFIG_MTD_OF_PARTS=y +CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y -CONFIG_MTD_NAND_IDS=y -CONFIG_MTD_NAND_ECC=y -CONFIG_MTD_M25P80=y CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 -CONFIG_MISC_DEVICES=y CONFIG_EEPROM_LEGACY=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y @@ -137,7 +128,6 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=2 CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y @@ -186,7 +176,6 @@ CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y -CONFIG_USB_DEVICEFS=y CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_FSL=y @@ -232,18 +221,13 @@ CONFIG_QNX4FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y CONFIG_CRC_T10DIF=y CONFIG_DEBUG_FS=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_INFO=y -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 50d82c8a037f..b3c083de17ad 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -553,9 +553,7 @@ static inline int cpu_has_feature(unsigned long feature) & feature); } -#ifdef CONFIG_HAVE_HW_BREAKPOINT #define HBP_NUM 1 -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 50ea12fd7bf5..a8bf5c673a3c 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -33,6 +33,7 @@ #include <asm/kvm_asm.h> #include <asm/processor.h> #include <asm/page.h> +#include <asm/cacheflush.h> #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 0124937a23b9..e006f0bdea95 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -219,4 +219,16 @@ void kvmppc_claim_lpid(long lpid); void kvmppc_free_lpid(long lpid); void kvmppc_init_lpid(unsigned long nr_lpids); +static inline void kvmppc_mmu_flush_icache(pfn_t pfn) +{ + /* Clear i-cache for new pages */ + struct page *page; + page = pfn_to_page(pfn); + if (!test_bit(PG_arch_1, &page->flags)) { + flush_dcache_icache_page(page); + set_bit(PG_arch_1, &page->flags); + } +} + + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/include/asm/mpic_msgr.h b/arch/powerpc/include/asm/mpic_msgr.h index 326d33ca55cd..d4f471fb1031 100644 --- a/arch/powerpc/include/asm/mpic_msgr.h +++ b/arch/powerpc/include/asm/mpic_msgr.h @@ -14,6 +14,7 @@ #include <linux/types.h> #include <linux/spinlock.h> #include <asm/smp.h> +#include <asm/io.h> struct mpic_msgr { u32 __iomem *base; diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 2d7bb8ced136..e4897523de41 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -83,11 +83,10 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask) return 0; } - if ((tbl->it_offset + tbl->it_size) > (mask >> IOMMU_PAGE_SHIFT)) { - dev_info(dev, "Warning: IOMMU window too big for device mask\n"); - dev_info(dev, "mask: 0x%08llx, table end: 0x%08lx\n", - mask, (tbl->it_offset + tbl->it_size) << - IOMMU_PAGE_SHIFT); + if (tbl->it_offset > (mask >> IOMMU_PAGE_SHIFT)) { + dev_info(dev, "Warning: IOMMU offset too big for device mask\n"); + dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n", + mask, tbl->it_offset << IOMMU_PAGE_SHIFT); return 0; } else return 1; diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index f3a82dde61db..956a4c496de9 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -253,7 +253,7 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) /* Do not emulate user-space instructions, instead single-step them */ if (user_mode(regs)) { - bp->ctx->task->thread.last_hit_ubp = bp; + current->thread.last_hit_ubp = bp; regs->msr |= MSR_SE; goto out; } diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index 782bd0a3c2f0..c470a40b29f5 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -25,6 +25,7 @@ #include <asm/processor.h> #include <asm/machdep.h> #include <asm/debug.h> +#include <linux/slab.h> /* * This table contains the mapping between PowerPC hardware trap types, and @@ -101,6 +102,21 @@ static int computeSignal(unsigned int tt) return SIGHUP; /* default for things we don't know about */ } +/** + * + * kgdb_skipexception - Bail out of KGDB when we've been triggered. + * @exception: Exception vector number + * @regs: Current &struct pt_regs. + * + * On some architectures we need to skip a breakpoint exception when + * it occurs after a breakpoint has been removed. + * + */ +int kgdb_skipexception(int exception, struct pt_regs *regs) +{ + return kgdb_isremovedbreak(regs->nip); +} + static int kgdb_call_nmi_hook(struct pt_regs *regs) { kgdb_nmicallback(raw_smp_processor_id(), regs); @@ -138,6 +154,8 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs) static int kgdb_singlestep(struct pt_regs *regs) { struct thread_info *thread_info, *exception_thread_info; + struct thread_info *backup_current_thread_info = \ + (struct thread_info *)kmalloc(sizeof(struct thread_info), GFP_KERNEL); if (user_mode(regs)) return 0; @@ -155,13 +173,17 @@ static int kgdb_singlestep(struct pt_regs *regs) thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1)); exception_thread_info = current_thread_info(); - if (thread_info != exception_thread_info) + if (thread_info != exception_thread_info) { + /* Save the original current_thread_info. */ + memcpy(backup_current_thread_info, exception_thread_info, sizeof *thread_info); memcpy(exception_thread_info, thread_info, sizeof *thread_info); + } kgdb_handle_exception(0, SIGTRAP, 0, regs); if (thread_info != exception_thread_info) - memcpy(thread_info, exception_thread_info, sizeof *thread_info); + /* Restore current_thread_info lastly. */ + memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info); return 1; } @@ -410,7 +432,6 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code, #else linux_regs->msr |= MSR_SE; #endif - kgdb_single_step = 1; atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); } diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index f2496f2faecc..4e3cc47f26b9 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -107,11 +107,11 @@ long ppc64_personality(unsigned long personality) long ret; if (personality(current->personality) == PER_LINUX32 - && personality == PER_LINUX) - personality = PER_LINUX32; + && personality(personality) == PER_LINUX) + personality = (personality & ~PER_MASK) | PER_LINUX32; ret = sys_personality(personality); - if (ret == PER_LINUX32) - ret = PER_LINUX; + if (personality(ret) == PER_LINUX32) + ret = (ret & ~PER_MASK) | PER_LINUX; return ret; } #endif diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c index f922c29bb234..837f13e7b6bf 100644 --- a/arch/powerpc/kvm/book3s_32_mmu_host.c +++ b/arch/powerpc/kvm/book3s_32_mmu_host.c @@ -211,6 +211,9 @@ next_pteg: pteg1 |= PP_RWRX; } + if (orig_pte->may_execute) + kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT); + local_irq_disable(); if (pteg[rr]) { diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 10fc8ec9d2a8..0688b6b39585 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -126,6 +126,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte) if (!orig_pte->may_execute) rflags |= HPTE_R_N; + else + kvmppc_mmu_flush_icache(hpaddr >> PAGE_SHIFT); hash = hpt_hash(va, PTE_SIZE, MMU_SEGSIZE_256M); diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 5a84c8d3d040..44b72feaff7d 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1421,13 +1421,13 @@ _GLOBAL(kvmppc_h_cede) sync /* order setting ceded vs. testing prodded */ lbz r5,VCPU_PRODDED(r3) cmpwi r5,0 - bne 1f + bne kvm_cede_prodded li r0,0 /* set trap to 0 to say hcall is handled */ stw r0,VCPU_TRAP(r3) li r0,H_SUCCESS std r0,VCPU_GPR(R3)(r3) BEGIN_FTR_SECTION - b 2f /* just send it up to host on 970 */ + b kvm_cede_exit /* just send it up to host on 970 */ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) /* @@ -1446,7 +1446,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) or r4,r4,r0 PPC_POPCNTW(R7,R4) cmpw r7,r8 - bge 2f + bge kvm_cede_exit stwcx. r4,0,r6 bne 31b li r0,1 @@ -1555,7 +1555,8 @@ kvm_end_cede: b hcall_real_fallback /* cede when already previously prodded case */ -1: li r0,0 +kvm_cede_prodded: + li r0,0 stb r0,VCPU_PRODDED(r3) sync /* order testing prodded vs. clearing ceded */ stb r0,VCPU_CEDED(r3) @@ -1563,7 +1564,8 @@ kvm_end_cede: blr /* we've ceded but we want to give control to the host */ -2: li r3,H_TOO_HARD +kvm_cede_exit: + li r3,H_TOO_HARD blr secondary_too_late: diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c index c510fc961302..a2b66717813d 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_tlb.c @@ -322,11 +322,11 @@ static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) { if (vcpu_e500->g2h_tlb1_map) - memset(vcpu_e500->g2h_tlb1_map, - sizeof(u64) * vcpu_e500->gtlb_params[1].entries, 0); + memset(vcpu_e500->g2h_tlb1_map, 0, + sizeof(u64) * vcpu_e500->gtlb_params[1].entries); if (vcpu_e500->h2g_tlb1_rmap) - memset(vcpu_e500->h2g_tlb1_rmap, - sizeof(unsigned int) * host_tlb_params[1].entries, 0); + memset(vcpu_e500->h2g_tlb1_rmap, 0, + sizeof(unsigned int) * host_tlb_params[1].entries); } static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) @@ -539,6 +539,9 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, ref, gvaddr, stlbe); + + /* Clear i-cache for new pages */ + kvmppc_mmu_flush_icache(pfn); } /* XXX only map the one-one case, for now use TLB0 */ diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index f9ede7c6606e..0d24ff15f5f6 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -288,7 +288,7 @@ err1; stb r0,0(r3) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) bl .enter_vmx_usercopy - cmpwi r3,0 + cmpwi cr1,r3,0 ld r0,STACKFRAMESIZE+16(r1) ld r3,STACKFRAMESIZE+48(r1) ld r4,STACKFRAMESIZE+56(r1) @@ -326,38 +326,7 @@ err1; stb r0,0(r3) dcbt r0,r8,0b01010 /* GO */ .machine pop - /* - * We prefetch both the source and destination using enhanced touch - * instructions. We use a stream ID of 0 for the load side and - * 1 for the store side. - */ - clrrdi r6,r4,7 - clrrdi r9,r3,7 - ori r9,r9,1 /* stream=1 */ - - srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */ - cmpldi cr1,r7,0x3FF - ble cr1,1f - li r7,0x3FF -1: lis r0,0x0E00 /* depth=7 */ - sldi r7,r7,7 - or r7,r7,r0 - ori r10,r7,1 /* stream=1 */ - - lis r8,0x8000 /* GO=1 */ - clrldi r8,r8,32 - -.machine push -.machine "power4" - dcbt r0,r6,0b01000 - dcbt r0,r7,0b01010 - dcbtst r0,r9,0b01000 - dcbtst r0,r10,0b01010 - eieio - dcbt r0,r8,0b01010 /* GO */ -.machine pop - - beq .Lunwind_stack_nonvmx_copy + beq cr1,.Lunwind_stack_nonvmx_copy /* * If source and destination are not relatively aligned we use a diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index 0efdc51bc716..7ba6c96de778 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -222,7 +222,7 @@ _GLOBAL(memcpy_power7) std r0,16(r1) stdu r1,-STACKFRAMESIZE(r1) bl .enter_vmx_copy - cmpwi r3,0 + cmpwi cr1,r3,0 ld r0,STACKFRAMESIZE+16(r1) ld r3,STACKFRAMESIZE+48(r1) ld r4,STACKFRAMESIZE+56(r1) @@ -260,7 +260,7 @@ _GLOBAL(memcpy_power7) dcbt r0,r8,0b01010 /* GO */ .machine pop - beq .Lunwind_stack_nonvmx_copy + beq cr1,.Lunwind_stack_nonvmx_copy /* * If source and destination are not relatively aligned we use a diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index baaafde7d135..fbdad0e3929a 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -469,6 +469,7 @@ void flush_dcache_icache_page(struct page *page) __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT); #endif } +EXPORT_SYMBOL(flush_dcache_icache_page); void clear_user_page(void *page, unsigned long vaddr, struct page *pg) { diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 77b49ddda9d3..7cd2dbd6e4c4 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1431,7 +1431,7 @@ static void perf_event_interrupt(struct pt_regs *regs) if (!event->hw.idx || is_limited_pmc(event->hw.idx)) continue; val = read_pmc(event->hw.idx); - if ((int)val < 0) { + if (pmc_overflow(val)) { /* event has overflowed */ found = 1; record_and_restart(event, val, regs); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index a7b2a600d0a4..c37f46136321 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -465,7 +465,7 @@ int __init fsl_add_bridge(struct device_node *dev, int is_primary) iounmap(hose->cfg_data); iounmap(hose->cfg_addr); pcibios_free_controller(hose); - return 0; + return -ENODEV; } setup_pci_cmd(hose); @@ -827,6 +827,7 @@ struct device_node *fsl_pci_primary; void __devinit fsl_pci_init(void) { + int ret; struct device_node *node; struct pci_controller *hose; dma_addr_t max = 0xffffffff; @@ -855,10 +856,12 @@ void __devinit fsl_pci_init(void) if (!fsl_pci_primary) fsl_pci_primary = node; - fsl_add_bridge(node, fsl_pci_primary == node); - hose = pci_find_hose_for_OF_device(node); - max = min(max, hose->dma_window_base_cur + - hose->dma_window_size); + ret = fsl_add_bridge(node, fsl_pci_primary == node); + if (ret == 0) { + hose = pci_find_hose_for_OF_device(node); + max = min(max, hose->dma_window_base_cur + + hose->dma_window_size); + } } } diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index 483d8fa72e8b..e961f8c4a8f0 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -14,6 +14,9 @@ #include <linux/list.h> #include <linux/of_platform.h> #include <linux/errno.h> +#include <linux/err.h> +#include <linux/export.h> +#include <linux/slab.h> #include <asm/prom.h> #include <asm/hw_irq.h> #include <asm/ppc-pci.h> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index eab3492a45c5..9b49c65ee7a4 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -17,6 +17,7 @@ #include <linux/reboot.h> #include <linux/delay.h> #include <linux/kallsyms.h> +#include <linux/kmsg_dump.h> #include <linux/cpumask.h> #include <linux/export.h> #include <linux/sysrq.h> @@ -894,13 +895,13 @@ cmds(struct pt_regs *excp) #endif default: printf("Unrecognized command: "); - do { + do { if (' ' < cmd && cmd <= '~') putchar(cmd); else printf("\\x%x", cmd); cmd = inchar(); - } while (cmd != '\n'); + } while (cmd != '\n'); printf(" (type ? for help)\n"); break; } @@ -1097,7 +1098,7 @@ static long check_bp_loc(unsigned long addr) return 1; } -static char *breakpoint_help_string = +static char *breakpoint_help_string = "Breakpoint command usage:\n" "b show breakpoints\n" "b <addr> [cnt] set breakpoint at given instr addr\n" @@ -1193,7 +1194,7 @@ bpt_cmds(void) default: termch = cmd; - cmd = skipbl(); + cmd = skipbl(); if (cmd == '?') { printf(breakpoint_help_string); break; @@ -1359,7 +1360,7 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr, sp + REGS_OFFSET); break; } - printf("--- Exception: %lx %s at ", regs.trap, + printf("--- Exception: %lx %s at ", regs.trap, getvecname(TRAP(®s))); pc = regs.nip; lr = regs.link; @@ -1623,14 +1624,14 @@ static void super_regs(void) cmd = skipbl(); if (cmd == '\n') { - unsigned long sp, toc; + unsigned long sp, toc; asm("mr %0,1" : "=r" (sp) :); asm("mr %0,2" : "=r" (toc) :); printf("msr = "REG" sprg0= "REG"\n", mfmsr(), mfspr(SPRN_SPRG0)); printf("pvr = "REG" sprg1= "REG"\n", - mfspr(SPRN_PVR), mfspr(SPRN_SPRG1)); + mfspr(SPRN_PVR), mfspr(SPRN_SPRG1)); printf("dec = "REG" sprg2= "REG"\n", mfspr(SPRN_DEC), mfspr(SPRN_SPRG2)); printf("sp = "REG" sprg3= "REG"\n", sp, mfspr(SPRN_SPRG3)); @@ -1783,7 +1784,7 @@ byterev(unsigned char *val, int size) static int brev; static int mnoread; -static char *memex_help_string = +static char *memex_help_string = "Memory examine command usage:\n" "m [addr] [flags] examine/change memory\n" " addr is optional. will start where left off.\n" @@ -1798,7 +1799,7 @@ static char *memex_help_string = "NOTE: flags are saved as defaults\n" ""; -static char *memex_subcmd_help_string = +static char *memex_subcmd_help_string = "Memory examine subcommands:\n" " hexval write this val to current location\n" " 'string' write chars from string to this location\n" @@ -2064,7 +2065,7 @@ prdump(unsigned long adrs, long ndump) nr = mread(adrs, temp, r); adrs += nr; for (m = 0; m < r; ++m) { - if ((m & (sizeof(long) - 1)) == 0 && m > 0) + if ((m & (sizeof(long) - 1)) == 0 && m > 0) putchar(' '); if (m < nr) printf("%.2x", temp[m]); @@ -2072,7 +2073,7 @@ prdump(unsigned long adrs, long ndump) printf("%s", fault_chars[fault_type]); } for (; m < 16; ++m) { - if ((m & (sizeof(long) - 1)) == 0) + if ((m & (sizeof(long) - 1)) == 0) putchar(' '); printf(" "); } @@ -2148,45 +2149,28 @@ print_address(unsigned long addr) void dump_log_buf(void) { - const unsigned long size = 128; - unsigned long end, addr; - unsigned char buf[size + 1]; - - addr = 0; - buf[size] = '\0'; - - if (setjmp(bus_error_jmp) != 0) { - printf("Unable to lookup symbol __log_buf!\n"); - return; - } - - catch_memory_errors = 1; - sync(); - addr = kallsyms_lookup_name("__log_buf"); - - if (! addr) - printf("Symbol __log_buf not found!\n"); - else { - end = addr + (1 << CONFIG_LOG_BUF_SHIFT); - while (addr < end) { - if (! mread(addr, buf, size)) { - printf("Can't read memory at address 0x%lx\n", addr); - break; - } - - printf("%s", buf); - - if (strlen(buf) < size) - break; - - addr += size; - } - } - - sync(); - /* wait a little while to see if we get a machine check */ - __delay(200); - catch_memory_errors = 0; + struct kmsg_dumper dumper = { .active = 1 }; + unsigned char buf[128]; + size_t len; + + if (setjmp(bus_error_jmp) != 0) { + printf("Error dumping printk buffer!\n"); + return; + } + + catch_memory_errors = 1; + sync(); + + kmsg_dump_rewind_nolock(&dumper); + while (kmsg_dump_get_line_nolock(&dumper, false, buf, sizeof(buf), &len)) { + buf[len] = '\0'; + printf("%s", buf); + } + + sync(); + /* wait a little while to see if we get a machine check */ + __delay(200); + catch_memory_errors = 0; } /* diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ec3a1aa4abd..28dd891a0a16 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -36,6 +36,7 @@ config X86 select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FENTRY if X86_64 select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER @@ -60,6 +61,8 @@ config X86 select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS select HAVE_PERF_EVENTS_NMI + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select ANON_INODES select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386 select HAVE_CMPXCHG_LOCAL if !M386 diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index b0767bc08740..9a25b522d377 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -3,38 +3,54 @@ #ifdef __ASSEMBLY__ - .macro MCOUNT_SAVE_FRAME - /* taken from glibc */ - subq $0x38, %rsp - movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) + /* skip is set if the stack was already partially adjusted */ + .macro MCOUNT_SAVE_FRAME skip=0 + /* + * We add enough stack to save all regs. + */ + subq $(SS+8-\skip), %rsp + movq %rax, RAX(%rsp) + movq %rcx, RCX(%rsp) + movq %rdx, RDX(%rsp) + movq %rsi, RSI(%rsp) + movq %rdi, RDI(%rsp) + movq %r8, R8(%rsp) + movq %r9, R9(%rsp) + /* Move RIP to its proper location */ + movq SS+8(%rsp), %rdx + movq %rdx, RIP(%rsp) .endm - .macro MCOUNT_RESTORE_FRAME - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $0x38, %rsp + .macro MCOUNT_RESTORE_FRAME skip=0 + movq R9(%rsp), %r9 + movq R8(%rsp), %r8 + movq RDI(%rsp), %rdi + movq RSI(%rsp), %rsi + movq RDX(%rsp), %rdx + movq RCX(%rsp), %rcx + movq RAX(%rsp), %rax + addq $(SS+8-\skip), %rsp .endm #endif #ifdef CONFIG_FUNCTION_TRACER -#define MCOUNT_ADDR ((long)(mcount)) +#ifdef CC_USING_FENTRY +# define MCOUNT_ADDR ((long)(__fentry__)) +#else +# define MCOUNT_ADDR ((long)(mcount)) +#endif #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ +#ifdef CONFIG_DYNAMIC_FTRACE +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#define ARCH_SUPPORTS_FTRACE_SAVE_REGS +#endif + #ifndef __ASSEMBLY__ extern void mcount(void); extern atomic_t modifying_ftrace_code; +extern void __fentry__(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 547882539157..d3ddd17405d0 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -27,6 +27,7 @@ #include <asm/insn.h> #define __ARCH_WANT_KPROBES_INSN_SLOT +#define ARCH_SUPPORTS_KPROBES_ON_FTRACE struct pt_regs; struct kprobe; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index cb4e43bce98a..4fabcdf1cfa7 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -262,4 +262,6 @@ static inline void perf_check_microcode(void) { } static inline void amd_pmu_disable_virt(void) { } #endif +#define arch_perf_out_copy_user copy_from_user_nmi + #endif /* _ASM_X86_PERF_EVENT_H */ diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/asm/perf_regs.h new file mode 100644 index 000000000000..3f2207bfd17b --- /dev/null +++ b/arch/x86/include/asm/perf_regs.h @@ -0,0 +1,33 @@ +#ifndef _ASM_X86_PERF_REGS_H +#define _ASM_X86_PERF_REGS_H + +enum perf_event_x86_regs { + PERF_REG_X86_AX, + PERF_REG_X86_BX, + PERF_REG_X86_CX, + PERF_REG_X86_DX, + PERF_REG_X86_SI, + PERF_REG_X86_DI, + PERF_REG_X86_BP, + PERF_REG_X86_SP, + PERF_REG_X86_IP, + PERF_REG_X86_FLAGS, + PERF_REG_X86_CS, + PERF_REG_X86_SS, + PERF_REG_X86_DS, + PERF_REG_X86_ES, + PERF_REG_X86_FS, + PERF_REG_X86_GS, + PERF_REG_X86_R8, + PERF_REG_X86_R9, + PERF_REG_X86_R10, + PERF_REG_X86_R11, + PERF_REG_X86_R12, + PERF_REG_X86_R13, + PERF_REG_X86_R14, + PERF_REG_X86_R15, + + PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1, + PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1, +}; +#endif /* _ASM_X86_PERF_REGS_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index b315a33867f2..33692eaabab5 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -12,8 +12,7 @@ * Simple spin lock operations. There are two variants, one clears IRQ's * on the local processor, one does not. * - * These are fair FIFO ticket locks, which are currently limited to 256 - * CPUs. + * These are fair FIFO ticket locks, which support up to 2^16 CPUs. * * (the type definitions are in asm/spinlock_types.h) */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8215e5652d97..8d7a619718b5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_OF) += devicetree.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_PERF_EVENTS) += perf_regs.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index afb7ff79a29f..ced4534baed5 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -165,7 +165,7 @@ static const unsigned char * const k7_nops[ASM_NOP_MAX+2] = #endif #ifdef P6_NOP1 -static const unsigned char __initconst_or_module p6nops[] = +static const unsigned char p6nops[] = { P6_NOP1, P6_NOP2, diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 623f28837476..061ac17ee974 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1109,17 +1109,21 @@ ENTRY(ftrace_caller) pushl %eax pushl %ecx pushl %edx - movl 0xc(%esp), %eax + pushl $0 /* Pass NULL as regs pointer */ + movl 4*4(%esp), %eax movl 0x4(%ebp), %edx + leal function_trace_op, %ecx subl $MCOUNT_INSN_SIZE, %eax .globl ftrace_call ftrace_call: call ftrace_stub + addl $4,%esp /* skip NULL pointer */ popl %edx popl %ecx popl %eax +ftrace_ret: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: @@ -1131,6 +1135,72 @@ ftrace_stub: ret END(ftrace_caller) +ENTRY(ftrace_regs_caller) + pushf /* push flags before compare (in cs location) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* + * i386 does not save SS and ESP when coming from kernel. + * Instead, to get sp, ®s->sp is used (see ptrace.h). + * Unfortunately, that means eflags must be at the same location + * as the current return ip is. We move the return ip into the + * ip location, and move flags into the return ip location. + */ + pushl 4(%esp) /* save return ip into ip slot */ + subl $MCOUNT_INSN_SIZE, (%esp) /* Adjust ip */ + + pushl $0 /* Load 0 into orig_ax */ + pushl %gs + pushl %fs + pushl %es + pushl %ds + pushl %eax + pushl %ebp + pushl %edi + pushl %esi + pushl %edx + pushl %ecx + pushl %ebx + + movl 13*4(%esp), %eax /* Get the saved flags */ + movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */ + /* clobbering return ip */ + movl $__KERNEL_CS,13*4(%esp) + + movl 12*4(%esp), %eax /* Load ip (1st parameter) */ + movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */ + leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */ + pushl %esp /* Save pt_regs as 4th parameter */ + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + addl $4, %esp /* Skip pt_regs */ + movl 14*4(%esp), %eax /* Move flags back into cs */ + movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */ + movl 12*4(%esp), %eax /* Get return ip from regs->ip */ + addl $MCOUNT_INSN_SIZE, %eax + movl %eax, 14*4(%esp) /* Put return ip back for ret */ + + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %eax + popl %ds + popl %es + popl %fs + popl %gs + addl $8, %esp /* Skip orig_ax and ip */ + popf /* Pop flags at end (no addl to corrupt flags) */ + jmp ftrace_ret + +ftrace_restore_flags: + popf + jmp ftrace_stub #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) @@ -1171,9 +1241,6 @@ END(mcount) #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - pushl %eax pushl %ecx pushl %edx diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 69babd8c834f..ed767b747fe5 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -68,25 +68,51 @@ .section .entry.text, "ax" #ifdef CONFIG_FUNCTION_TRACER + +#ifdef CC_USING_FENTRY +# define function_hook __fentry__ +#else +# define function_hook mcount +#endif + #ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(mcount) + +ENTRY(function_hook) retq -END(mcount) +END(function_hook) + +/* skip is set if stack has been adjusted */ +.macro ftrace_caller_setup skip=0 + MCOUNT_SAVE_FRAME \skip + + /* Load the ftrace_ops into the 3rd parameter */ + leaq function_trace_op, %rdx + + /* Load ip into the first parameter */ + movq RIP(%rsp), %rdi + subq $MCOUNT_INSN_SIZE, %rdi + /* Load the parent_ip into the second parameter */ +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else + movq 8(%rbp), %rsi +#endif +.endm ENTRY(ftrace_caller) + /* Check if tracing was disabled (quick check) */ cmpl $0, function_trace_stop jne ftrace_stub - MCOUNT_SAVE_FRAME - - movq 0x38(%rsp), %rdi - movq 8(%rbp), %rsi - subq $MCOUNT_INSN_SIZE, %rdi + ftrace_caller_setup + /* regs go into 4th parameter (but make it NULL) */ + movq $0, %rcx GLOBAL(ftrace_call) call ftrace_stub MCOUNT_RESTORE_FRAME +ftrace_return: #ifdef CONFIG_FUNCTION_GRAPH_TRACER GLOBAL(ftrace_graph_call) @@ -97,8 +123,74 @@ GLOBAL(ftrace_stub) retq END(ftrace_caller) +ENTRY(ftrace_regs_caller) + /* Save the current flags before compare (in SS location)*/ + pushfq + + /* Check if tracing was disabled (quick check) */ + cmpl $0, function_trace_stop + jne ftrace_restore_flags + + /* skip=8 to skip flags saved in SS */ + ftrace_caller_setup 8 + + /* Save the rest of pt_regs */ + movq %r15, R15(%rsp) + movq %r14, R14(%rsp) + movq %r13, R13(%rsp) + movq %r12, R12(%rsp) + movq %r11, R11(%rsp) + movq %r10, R10(%rsp) + movq %rbp, RBP(%rsp) + movq %rbx, RBX(%rsp) + /* Copy saved flags */ + movq SS(%rsp), %rcx + movq %rcx, EFLAGS(%rsp) + /* Kernel segments */ + movq $__KERNEL_DS, %rcx + movq %rcx, SS(%rsp) + movq $__KERNEL_CS, %rcx + movq %rcx, CS(%rsp) + /* Stack - skipping return address */ + leaq SS+16(%rsp), %rcx + movq %rcx, RSP(%rsp) + + /* regs go into 4th parameter */ + leaq (%rsp), %rcx + +GLOBAL(ftrace_regs_call) + call ftrace_stub + + /* Copy flags back to SS, to restore them */ + movq EFLAGS(%rsp), %rax + movq %rax, SS(%rsp) + + /* restore the rest of pt_regs */ + movq R15(%rsp), %r15 + movq R14(%rsp), %r14 + movq R13(%rsp), %r13 + movq R12(%rsp), %r12 + movq R10(%rsp), %r10 + movq RBP(%rsp), %rbp + movq RBX(%rsp), %rbx + + /* skip=8 to skip flags saved in SS */ + MCOUNT_RESTORE_FRAME 8 + + /* Restore flags */ + popfq + + jmp ftrace_return +ftrace_restore_flags: + popfq + jmp ftrace_stub + +END(ftrace_regs_caller) + + #else /* ! CONFIG_DYNAMIC_FTRACE */ -ENTRY(mcount) + +ENTRY(function_hook) cmpl $0, function_trace_stop jne ftrace_stub @@ -119,8 +211,12 @@ GLOBAL(ftrace_stub) trace: MCOUNT_SAVE_FRAME - movq 0x38(%rsp), %rdi + movq RIP(%rsp), %rdi +#ifdef CC_USING_FENTRY + movq SS+16(%rsp), %rsi +#else movq 8(%rbp), %rsi +#endif subq $MCOUNT_INSN_SIZE, %rdi call *ftrace_trace_function @@ -128,20 +224,22 @@ trace: MCOUNT_RESTORE_FRAME jmp ftrace_stub -END(mcount) +END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) - cmpl $0, function_trace_stop - jne ftrace_stub - MCOUNT_SAVE_FRAME +#ifdef CC_USING_FENTRY + leaq SS+16(%rsp), %rdi + movq $0, %rdx /* No framepointers needed */ +#else leaq 8(%rbp), %rdi - movq 0x38(%rsp), %rsi movq (%rbp), %rdx +#endif + movq RIP(%rsp), %rsi subq $MCOUNT_INSN_SIZE, %rsi call prepare_ftrace_return diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index c3a7cb4bf6e6..1d414029f1d8 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -206,6 +206,21 @@ static int ftrace_modify_code(unsigned long ip, unsigned const char *old_code, unsigned const char *new_code); +/* + * Should never be called: + * As it is only called by __ftrace_replace_code() which is called by + * ftrace_replace_code() that x86 overrides, and by ftrace_update_code() + * which is called to turn mcount into nops or nops into function calls + * but not to convert a function from not using regs to one that uses + * regs, which ftrace_modify_call() is for. + */ +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + WARN_ON(1); + return -EINVAL; +} + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -220,6 +235,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ret = ftrace_modify_code(ip, old, new); + /* Also update the regs callback function */ + if (!ret) { + ip = (unsigned long)(&ftrace_regs_call); + memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE); + new = ftrace_call_replace(ip, (unsigned long)func); + ret = ftrace_modify_code(ip, old, new); + } + atomic_dec(&modifying_ftrace_code); return ret; @@ -299,6 +322,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec) return add_break(rec->ip, old); } +/* + * If the record has the FTRACE_FL_REGS set, that means that it + * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS + * is not not set, then it wants to convert to the normal callback. + */ +static unsigned long get_ftrace_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + +/* + * The FTRACE_FL_REGS_EN is set when the record already points to + * a function that saves all the regs. Basically the '_EN' version + * represents the current state of the function. + */ +static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec) +{ + if (rec->flags & FTRACE_FL_REGS_EN) + return (unsigned long)FTRACE_REGS_ADDR; + else + return (unsigned long)FTRACE_ADDR; +} + static int add_breakpoints(struct dyn_ftrace *rec, int enable) { unsigned long ftrace_addr; @@ -306,7 +355,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: @@ -316,6 +365,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable) /* converting nop to call */ return add_brk_on_nop(rec); + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: + ftrace_addr = get_ftrace_old_addr(rec); + /* fall through */ case FTRACE_UPDATE_MAKE_NOP: /* converting a call to a nop */ return add_brk_on_call(rec, ftrace_addr); @@ -360,13 +413,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec) * If not, don't touch the breakpoint, we make just create * a disaster. */ - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); + nop = ftrace_call_replace(ip, ftrace_addr); + + if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0) + goto update; + + /* Check both ftrace_addr and ftrace_old_addr */ + ftrace_addr = get_ftrace_old_addr(rec); nop = ftrace_call_replace(ip, ftrace_addr); if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0) return -EINVAL; } + update: return probe_kernel_write((void *)ip, &nop[0], 1); } @@ -405,12 +466,14 @@ static int add_update(struct dyn_ftrace *rec, int enable) ret = ftrace_test_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return add_update_call(rec, ftrace_addr); @@ -455,12 +518,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable) ret = ftrace_update_record(rec, enable); - ftrace_addr = (unsigned long)FTRACE_ADDR; + ftrace_addr = get_ftrace_addr(rec); switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: case FTRACE_UPDATE_MAKE_CALL: /* converting nop to call */ return finish_update_call(rec, ftrace_addr); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 7ad683d78645..d44f7829968e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -270,7 +270,7 @@ void fixup_irqs(void) if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { break_affinity = 1; - affinity = cpu_all_mask; + affinity = cpu_online_mask; } chip = irq_data_get_irq_chip(data); diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index e2f751efb7b1..47ae1023a93c 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -1052,6 +1052,54 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } +#ifdef KPROBES_CAN_USE_FTRACE +/* Ftrace callback handler for kprobes */ +void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + regs->ip += sizeof(kprobe_opcode_t); + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (p->pre_handler) + p->pre_handler(p, regs); + + if (unlikely(p->post_handler)) { + /* Emulate singlestep as if there is a 5byte nop */ + regs->ip = ip + MCOUNT_INSN_SIZE; + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); + regs->ip = ip; /* Recover for next callback */ + } +end: + local_irq_restore(flags); +} + +int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} +#endif + int __init arch_init_kprobes(void) { return arch_init_optprobes(); diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 8a2ce8fd41c0..82746f942cd8 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -143,11 +143,12 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr, unsigned int *current_size) { struct microcode_header_amd *mc_hdr; - unsigned int actual_size; + unsigned int actual_size, patch_size; u16 equiv_cpu_id; /* size of the current patch we're staring at */ - *current_size = *(u32 *)(ucode_ptr + 4) + SECTION_HDR_SIZE; + patch_size = *(u32 *)(ucode_ptr + 4); + *current_size = patch_size + SECTION_HDR_SIZE; equiv_cpu_id = find_equiv_id(); if (!equiv_cpu_id) @@ -174,7 +175,7 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr, /* * now that the header looks sane, verify its size */ - actual_size = verify_ucode_size(cpu, *current_size, leftover_size); + actual_size = verify_ucode_size(cpu, patch_size, leftover_size); if (!actual_size) return 0; diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 4873e62db6a1..9e5bcf1e2376 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -225,6 +225,9 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, if (do_microcode_update(buf, len) == 0) ret = (ssize_t)len; + if (ret > 0) + perf_check_microcode(); + mutex_unlock(µcode_mutex); put_online_cpus(); diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c new file mode 100644 index 000000000000..c5a3e5cfe07f --- /dev/null +++ b/arch/x86/kernel/perf_regs.c @@ -0,0 +1,105 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/bug.h> +#include <linux/stddef.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +#ifdef CONFIG_X86_32 +#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX +#else +#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX +#endif + +#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r) + +static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = { + PT_REGS_OFFSET(PERF_REG_X86_AX, ax), + PT_REGS_OFFSET(PERF_REG_X86_BX, bx), + PT_REGS_OFFSET(PERF_REG_X86_CX, cx), + PT_REGS_OFFSET(PERF_REG_X86_DX, dx), + PT_REGS_OFFSET(PERF_REG_X86_SI, si), + PT_REGS_OFFSET(PERF_REG_X86_DI, di), + PT_REGS_OFFSET(PERF_REG_X86_BP, bp), + PT_REGS_OFFSET(PERF_REG_X86_SP, sp), + PT_REGS_OFFSET(PERF_REG_X86_IP, ip), + PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags), + PT_REGS_OFFSET(PERF_REG_X86_CS, cs), + PT_REGS_OFFSET(PERF_REG_X86_SS, ss), +#ifdef CONFIG_X86_32 + PT_REGS_OFFSET(PERF_REG_X86_DS, ds), + PT_REGS_OFFSET(PERF_REG_X86_ES, es), + PT_REGS_OFFSET(PERF_REG_X86_FS, fs), + PT_REGS_OFFSET(PERF_REG_X86_GS, gs), +#else + /* + * The pt_regs struct does not store + * ds, es, fs, gs in 64 bit mode. + */ + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, + (unsigned int) -1, +#endif +#ifdef CONFIG_X86_64 + PT_REGS_OFFSET(PERF_REG_X86_R8, r8), + PT_REGS_OFFSET(PERF_REG_X86_R9, r9), + PT_REGS_OFFSET(PERF_REG_X86_R10, r10), + PT_REGS_OFFSET(PERF_REG_X86_R11, r11), + PT_REGS_OFFSET(PERF_REG_X86_R12, r12), + PT_REGS_OFFSET(PERF_REG_X86_R13, r13), + PT_REGS_OFFSET(PERF_REG_X86_R14, r14), + PT_REGS_OFFSET(PERF_REG_X86_R15, r15), +#endif +}; + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE(idx > ARRAY_SIZE(pt_regs_offset))) + return 0; + + return regs_get_register(regs, pt_regs_offset[idx]); +} + +#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL)) + +#ifdef CONFIG_X86_32 +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_32; +} +#else /* CONFIG_X86_64 */ +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ + (1ULL << PERF_REG_X86_ES) | \ + (1ULL << PERF_REG_X86_FS) | \ + (1ULL << PERF_REG_X86_GS)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + if (mask & REG_NOSUPPORT) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_IA32)) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} +#endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 6020f6f5927c..1330dd102950 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -13,9 +13,13 @@ #include <asm/ftrace.h> #ifdef CONFIG_FUNCTION_TRACER -/* mcount is defined in assembly */ +/* mcount and __fentry__ are defined in assembly */ +#ifdef CC_USING_FENTRY +EXPORT_SYMBOL(__fentry__); +#else EXPORT_SYMBOL(mcount); #endif +#endif EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 97d9a9914ba8..a3b57a27be88 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -475,13 +475,26 @@ register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg) return address_mask(ctxt, reg); } +static void masked_increment(ulong *reg, ulong mask, int inc) +{ + assign_masked(reg, *reg + inc, mask); +} + static inline void register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc) { + ulong mask; + if (ctxt->ad_bytes == sizeof(unsigned long)) - *reg += inc; + mask = ~0UL; else - *reg = (*reg & ~ad_mask(ctxt)) | ((*reg + inc) & ad_mask(ctxt)); + mask = ad_mask(ctxt); + masked_increment(reg, mask, inc); +} + +static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc) +{ + masked_increment(&ctxt->regs[VCPU_REGS_RSP], stack_mask(ctxt), inc); } static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) @@ -1522,8 +1535,8 @@ static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes) { struct segmented_address addr; - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], -bytes); - addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]); + rsp_increment(ctxt, -bytes); + addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt); addr.seg = VCPU_SREG_SS; return segmented_write(ctxt, addr, data, bytes); @@ -1542,13 +1555,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, int rc; struct segmented_address addr; - addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]); + addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt); addr.seg = VCPU_SREG_SS; rc = segmented_read(ctxt, addr, dest, len); if (rc != X86EMUL_CONTINUE) return rc; - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], len); + rsp_increment(ctxt, len); return rc; } @@ -1688,8 +1701,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) while (reg >= VCPU_REGS_RAX) { if (reg == VCPU_REGS_RSP) { - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], - ctxt->op_bytes); + rsp_increment(ctxt, ctxt->op_bytes); --reg; } @@ -2825,7 +2837,7 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], ctxt->src.val); + rsp_increment(ctxt, ctxt->src.val); return X86EMUL_CONTINUE; } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 01ca00423938..7fbd0d273ea8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4113,16 +4113,21 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc) LIST_HEAD(invalid_list); /* + * Never scan more than sc->nr_to_scan VM instances. + * Will not hit this condition practically since we do not try + * to shrink more than one VM and it is very unlikely to see + * !n_used_mmu_pages so many times. + */ + if (!nr_to_scan--) + break; + /* * n_used_mmu_pages is accessed without holding kvm->mmu_lock * here. We may skip a VM instance errorneosly, but we do not * want to shrink a VM that only started to populate its MMU * anyway. */ - if (kvm->arch.n_used_mmu_pages > 0) { - if (!nr_to_scan--) - break; + if (!kvm->arch.n_used_mmu_pages) continue; - } idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 42bce48f6928..dce75b760312 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -806,7 +806,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); * kvm-specific. Those are put in the beginning of the list. */ -#define KVM_SAVE_MSRS_BEGIN 9 +#define KVM_SAVE_MSRS_BEGIN 10 static u32 msrs_to_save[] = { MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bf4bda6d3e9a..9642d4a38602 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -31,7 +31,6 @@ #include <linux/pci.h> #include <linux/gfp.h> #include <linux/memblock.h> -#include <linux/syscore_ops.h> #include <xen/xen.h> #include <xen/interface/xen.h> @@ -1470,130 +1469,38 @@ asmlinkage void __init xen_start_kernel(void) #endif } -#ifdef CONFIG_XEN_PVHVM -/* - * The pfn containing the shared_info is located somewhere in RAM. This - * will cause trouble if the current kernel is doing a kexec boot into a - * new kernel. The new kernel (and its startup code) can not know where - * the pfn is, so it can not reserve the page. The hypervisor will - * continue to update the pfn, and as a result memory corruption occours - * in the new kernel. - * - * One way to work around this issue is to allocate a page in the - * xen-platform pci device's BAR memory range. But pci init is done very - * late and the shared_info page is already in use very early to read - * the pvclock. So moving the pfn from RAM to MMIO is racy because some - * code paths on other vcpus could access the pfn during the small - * window when the old pfn is moved to the new pfn. There is even a - * small window were the old pfn is not backed by a mfn, and during that - * time all reads return -1. - * - * Because it is not known upfront where the MMIO region is located it - * can not be used right from the start in xen_hvm_init_shared_info. - * - * To minimise trouble the move of the pfn is done shortly before kexec. - * This does not eliminate the race because all vcpus are still online - * when the syscore_ops will be called. But hopefully there is no work - * pending at this point in time. Also the syscore_op is run last which - * reduces the risk further. - */ - -static struct shared_info *xen_hvm_shared_info; - -static void xen_hvm_connect_shared_info(unsigned long pfn) +void __ref xen_hvm_init_shared_info(void) { + int cpu; struct xen_add_to_physmap xatp; + static struct shared_info *shared_info_page = 0; + if (!shared_info_page) + shared_info_page = (struct shared_info *) + extend_brk(PAGE_SIZE, PAGE_SIZE); xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = pfn; + xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); -} -static void xen_hvm_set_shared_info(struct shared_info *sip) -{ - int cpu; - - HYPERVISOR_shared_info = sip; + HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info * page, we use it in the event channel upcall and in some pvclock * related functions. We don't need the vcpu_info placement * optimizations because we don't use any pv_mmu or pv_irq op on * HVM. - * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is - * online but xen_hvm_set_shared_info is run at resume time too and + * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is + * online but xen_hvm_init_shared_info is run at resume time too and * in that case multiple vcpus might be online. */ for_each_online_cpu(cpu) { per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; } } -/* Reconnect the shared_info pfn to a mfn */ -void xen_hvm_resume_shared_info(void) -{ - xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); -} - -#ifdef CONFIG_KEXEC -static struct shared_info *xen_hvm_shared_info_kexec; -static unsigned long xen_hvm_shared_info_pfn_kexec; - -/* Remember a pfn in MMIO space for kexec reboot */ -void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn) -{ - xen_hvm_shared_info_kexec = sip; - xen_hvm_shared_info_pfn_kexec = pfn; -} - -static void xen_hvm_syscore_shutdown(void) -{ - struct xen_memory_reservation reservation = { - .domid = DOMID_SELF, - .nr_extents = 1, - }; - unsigned long prev_pfn; - int rc; - - if (!xen_hvm_shared_info_kexec) - return; - - prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT; - set_xen_guest_handle(reservation.extent_start, &prev_pfn); - - /* Move pfn to MMIO, disconnects previous pfn from mfn */ - xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec); - - /* Update pointers, following hypercall is also a memory barrier */ - xen_hvm_set_shared_info(xen_hvm_shared_info_kexec); - - /* Allocate new mfn for previous pfn */ - do { - rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); - if (rc == 0) - msleep(123); - } while (rc == 0); - - /* Make sure the previous pfn is really connected to a (new) mfn */ - BUG_ON(rc != 1); -} - -static struct syscore_ops xen_hvm_syscore_ops = { - .shutdown = xen_hvm_syscore_shutdown, -}; -#endif - -/* Use a pfn in RAM, may move to MMIO before kexec. */ -static void __init xen_hvm_init_shared_info(void) -{ - /* Remember pointer for resume */ - xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); - xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); - xen_hvm_set_shared_info(xen_hvm_shared_info); -} - +#ifdef CONFIG_XEN_PVHVM static void __init init_hvm_pv_info(void) { int major, minor; @@ -1644,9 +1551,6 @@ static void __init xen_hvm_guest_init(void) init_hvm_pv_info(); xen_hvm_init_shared_info(); -#ifdef CONFIG_KEXEC - register_syscore_ops(&xen_hvm_syscore_ops); -#endif if (xen_feature(XENFEAT_hvm_callback_vector)) xen_have_vector_callback = 1; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index b2e91d40a4cb..d4b255463253 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -196,9 +196,11 @@ RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); /* When we populate back during bootup, the amount of pages can vary. The * max we have is seen is 395979, but that does not mean it can't be more. - * But some machines can have 3GB I/O holes even. So lets reserve enough - * for 4GB of I/O and E820 holes. */ -RESERVE_BRK(p2m_populated, PMD_SIZE * 4); + * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle + * it can re-use Xen provided mfn_list array, so we only need to allocate at + * most three P2M top nodes. */ +RESERVE_BRK(p2m_populated, PAGE_SIZE * 3); + static inline unsigned p2m_top_index(unsigned long pfn) { BUG_ON(pfn >= MAX_P2M_PFN); @@ -575,12 +577,99 @@ static bool __init early_alloc_p2m(unsigned long pfn) } return true; } + +/* + * Skim over the P2M tree looking at pages that are either filled with + * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and + * replace the P2M leaf with a p2m_missing or p2m_identity. + * Stick the old page in the new P2M tree location. + */ +bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn) +{ + unsigned topidx; + unsigned mididx; + unsigned ident_pfns; + unsigned inv_pfns; + unsigned long *p2m; + unsigned long *mid_mfn_p; + unsigned idx; + unsigned long pfn; + + /* We only look when this entails a P2M middle layer */ + if (p2m_index(set_pfn)) + return false; + + for (pfn = 0; pfn <= MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { + topidx = p2m_top_index(pfn); + + if (!p2m_top[topidx]) + continue; + + if (p2m_top[topidx] == p2m_mid_missing) + continue; + + mididx = p2m_mid_index(pfn); + p2m = p2m_top[topidx][mididx]; + if (!p2m) + continue; + + if ((p2m == p2m_missing) || (p2m == p2m_identity)) + continue; + + if ((unsigned long)p2m == INVALID_P2M_ENTRY) + continue; + + ident_pfns = 0; + inv_pfns = 0; + for (idx = 0; idx < P2M_PER_PAGE; idx++) { + /* IDENTITY_PFNs are 1:1 */ + if (p2m[idx] == IDENTITY_FRAME(pfn + idx)) + ident_pfns++; + else if (p2m[idx] == INVALID_P2M_ENTRY) + inv_pfns++; + else + break; + } + if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE)) + goto found; + } + return false; +found: + /* Found one, replace old with p2m_identity or p2m_missing */ + p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); + /* And the other for save/restore.. */ + mid_mfn_p = p2m_top_mfn_p[topidx]; + /* NOTE: Even if it is a p2m_identity it should still be point to + * a page filled with INVALID_P2M_ENTRY entries. */ + mid_mfn_p[mididx] = virt_to_mfn(p2m_missing); + + /* Reset where we want to stick the old page in. */ + topidx = p2m_top_index(set_pfn); + mididx = p2m_mid_index(set_pfn); + + /* This shouldn't happen */ + if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) + early_alloc_p2m(set_pfn); + + if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) + return false; + + p2m_init(p2m); + p2m_top[topidx][mididx] = p2m; + mid_mfn_p = p2m_top_mfn_p[topidx]; + mid_mfn_p[mididx] = virt_to_mfn(p2m); + + return true; +} bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) { if (unlikely(!__set_phys_to_machine(pfn, mfn))) { if (!early_alloc_p2m(pfn)) return false; + if (early_can_reuse_p2m_middle(pfn, mfn)) + return __set_phys_to_machine(pfn, mfn); + if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) return false; diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index ead85576d54a..d11ca11d14fc 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -78,9 +78,16 @@ static void __init xen_add_extra_mem(u64 start, u64 size) memblock_reserve(start, size); xen_max_p2m_pfn = PFN_DOWN(start + size); + for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { + unsigned long mfn = pfn_to_mfn(pfn); + + if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) + continue; + WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", + pfn, mfn); - for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++) __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + } } static unsigned long __init xen_do_chunk(unsigned long start, diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index ae8a00c39de4..45329c8c226e 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) { #ifdef CONFIG_XEN_PVHVM int cpu; - xen_hvm_resume_shared_info(); + xen_hvm_init_shared_info(); xen_callback_vector(); xen_unplug_emulated_devices(); if (xen_feature(XENFEAT_hvm_safe_pvclock)) { diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 1e4329e04e0f..202d4c150154 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -41,7 +41,7 @@ void xen_enable_syscall(void); void xen_vcpu_restore(void); void xen_callback_vector(void); -void xen_hvm_resume_shared_info(void); +void xen_hvm_init_shared_info(void); void xen_unplug_emulated_devices(void); void __init xen_build_dynamic_phys_to_machine(void); diff --git a/block/blk-lib.c b/block/blk-lib.c index 2b461b496a78..19cc761cacb2 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -44,6 +44,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, struct request_queue *q = bdev_get_queue(bdev); int type = REQ_WRITE | REQ_DISCARD; unsigned int max_discard_sectors; + unsigned int granularity, alignment, mask; struct bio_batch bb; struct bio *bio; int ret = 0; @@ -54,18 +55,20 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (!blk_queue_discard(q)) return -EOPNOTSUPP; + /* Zero-sector (unknown) and one-sector granularities are the same. */ + granularity = max(q->limits.discard_granularity >> 9, 1U); + mask = granularity - 1; + alignment = (bdev_discard_alignment(bdev) >> 9) & mask; + /* * Ensure that max_discard_sectors is of the proper - * granularity + * granularity, so that requests stay aligned after a split. */ max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); + max_discard_sectors = round_down(max_discard_sectors, granularity); if (unlikely(!max_discard_sectors)) { /* Avoid infinite loop below. Being cautious never hurts. */ return -EOPNOTSUPP; - } else if (q->limits.discard_granularity) { - unsigned int disc_sects = q->limits.discard_granularity >> 9; - - max_discard_sectors &= ~(disc_sects - 1); } if (flags & BLKDEV_DISCARD_SECURE) { @@ -79,25 +82,37 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, bb.wait = &wait; while (nr_sects) { + unsigned int req_sects; + sector_t end_sect; + bio = bio_alloc(gfp_mask, 1); if (!bio) { ret = -ENOMEM; break; } + req_sects = min_t(sector_t, nr_sects, max_discard_sectors); + + /* + * If splitting a request, and the next starting sector would be + * misaligned, stop the discard at the previous aligned sector. + */ + end_sect = sector + req_sects; + if (req_sects < nr_sects && (end_sect & mask) != alignment) { + end_sect = + round_down(end_sect - alignment, granularity) + + alignment; + req_sects = end_sect - sector; + } + bio->bi_sector = sector; bio->bi_end_io = bio_batch_end_io; bio->bi_bdev = bdev; bio->bi_private = &bb; - if (nr_sects > max_discard_sectors) { - bio->bi_size = max_discard_sectors << 9; - nr_sects -= max_discard_sectors; - sector += max_discard_sectors; - } else { - bio->bi_size = nr_sects << 9; - nr_sects = 0; - } + bio->bi_size = req_sects << 9; + nr_sects -= req_sects; + sector = end_sect; atomic_inc(&bb.done); submit_bio(type, bio); diff --git a/block/blk-merge.c b/block/blk-merge.c index 160035f54882..e76279e41162 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -110,6 +110,49 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, return 0; } +static void +__blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, + struct scatterlist *sglist, struct bio_vec **bvprv, + struct scatterlist **sg, int *nsegs, int *cluster) +{ + + int nbytes = bvec->bv_len; + + if (*bvprv && *cluster) { + if ((*sg)->length + nbytes > queue_max_segment_size(q)) + goto new_segment; + + if (!BIOVEC_PHYS_MERGEABLE(*bvprv, bvec)) + goto new_segment; + if (!BIOVEC_SEG_BOUNDARY(q, *bvprv, bvec)) + goto new_segment; + + (*sg)->length += nbytes; + } else { +new_segment: + if (!*sg) + *sg = sglist; + else { + /* + * If the driver previously mapped a shorter + * list, we could see a termination bit + * prematurely unless it fully inits the sg + * table on each mapping. We KNOW that there + * must be more entries here or the driver + * would be buggy, so force clear the + * termination bit to avoid doing a full + * sg_init_table() in drivers for each command. + */ + (*sg)->page_link &= ~0x02; + *sg = sg_next(*sg); + } + + sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); + (*nsegs)++; + } + *bvprv = bvec; +} + /* * map a request to scatterlist, return number of sg entries setup. Caller * must make sure sg can hold rq->nr_phys_segments entries @@ -131,41 +174,8 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq, bvprv = NULL; sg = NULL; rq_for_each_segment(bvec, rq, iter) { - int nbytes = bvec->bv_len; - - if (bvprv && cluster) { - if (sg->length + nbytes > queue_max_segment_size(q)) - goto new_segment; - - if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) - goto new_segment; - - sg->length += nbytes; - } else { -new_segment: - if (!sg) - sg = sglist; - else { - /* - * If the driver previously mapped a shorter - * list, we could see a termination bit - * prematurely unless it fully inits the sg - * table on each mapping. We KNOW that there - * must be more entries here or the driver - * would be buggy, so force clear the - * termination bit to avoid doing a full - * sg_init_table() in drivers for each command. - */ - sg->page_link &= ~0x02; - sg = sg_next(sg); - } - - sg_set_page(sg, bvec->bv_page, nbytes, bvec->bv_offset); - nsegs++; - } - bvprv = bvec; + __blk_segment_map_sg(q, bvec, sglist, &bvprv, &sg, + &nsegs, &cluster); } /* segments in rq */ @@ -199,6 +209,43 @@ new_segment: } EXPORT_SYMBOL(blk_rq_map_sg); +/** + * blk_bio_map_sg - map a bio to a scatterlist + * @q: request_queue in question + * @bio: bio being mapped + * @sglist: scatterlist being mapped + * + * Note: + * Caller must make sure sg can hold bio->bi_phys_segments entries + * + * Will return the number of sg entries setup + */ +int blk_bio_map_sg(struct request_queue *q, struct bio *bio, + struct scatterlist *sglist) +{ + struct bio_vec *bvec, *bvprv; + struct scatterlist *sg; + int nsegs, cluster; + unsigned long i; + + nsegs = 0; + cluster = blk_queue_cluster(q); + + bvprv = NULL; + sg = NULL; + bio_for_each_segment(bvec, bio, i) { + __blk_segment_map_sg(q, bvec, sglist, &bvprv, &sg, + &nsegs, &cluster); + } /* segments in bio */ + + if (sg) + sg_mark_end(sg); + + BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments); + return nsegs; +} +EXPORT_SYMBOL(blk_bio_map_sg); + static inline int ll_new_hw_segment(struct request_queue *q, struct request *req, struct bio *bio) diff --git a/block/genhd.c b/block/genhd.c index cac7366957c3..d839723303c8 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -835,7 +835,7 @@ static void disk_seqf_stop(struct seq_file *seqf, void *v) static void *show_partition_start(struct seq_file *seqf, loff_t *pos) { - static void *p; + void *p; p = disk_seqf_start(seqf, pos); if (!IS_ERR_OR_NULL(p) && !*pos) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 2be8ef1d3093..27cecd313e75 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -115,7 +115,7 @@ config SATA_SIL24 If unsure, say N. config ATA_SFF - bool "ATA SFF support" + bool "ATA SFF support (for legacy IDE and PATA)" default y help This option adds support for ATA controllers with SFF diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 062e6a1a248f..50d5dea0ff59 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -256,6 +256,14 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x8c07), board_ahci }, /* Lynx Point RAID */ { PCI_VDEVICE(INTEL, 0x8c0e), board_ahci }, /* Lynx Point RAID */ { PCI_VDEVICE(INTEL, 0x8c0f), board_ahci }, /* Lynx Point RAID */ + { PCI_VDEVICE(INTEL, 0x9c02), board_ahci }, /* Lynx Point-LP AHCI */ + { PCI_VDEVICE(INTEL, 0x9c03), board_ahci }, /* Lynx Point-LP AHCI */ + { PCI_VDEVICE(INTEL, 0x9c04), board_ahci }, /* Lynx Point-LP RAID */ + { PCI_VDEVICE(INTEL, 0x9c05), board_ahci }, /* Lynx Point-LP RAID */ + { PCI_VDEVICE(INTEL, 0x9c06), board_ahci }, /* Lynx Point-LP RAID */ + { PCI_VDEVICE(INTEL, 0x9c07), board_ahci }, /* Lynx Point-LP RAID */ + { PCI_VDEVICE(INTEL, 0x9c0e), board_ahci }, /* Lynx Point-LP RAID */ + { PCI_VDEVICE(INTEL, 0x9c0f), board_ahci }, /* Lynx Point-LP RAID */ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index c2594ddf25b0..57eb1c212a4c 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -320,6 +320,7 @@ extern struct device_attribute *ahci_sdev_attrs[]; extern struct ata_port_operations ahci_ops; extern struct ata_port_operations ahci_pmp_retry_srst_ops; +unsigned int ahci_dev_classify(struct ata_port *ap); void ahci_fill_cmd_slot(struct ahci_port_priv *pp, unsigned int tag, u32 opts); void ahci_save_initial_config(struct device *dev, diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 3c809bfbccf5..ef773e12af79 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -329,6 +329,14 @@ static const struct pci_device_id piix_pci_tbl[] = { { 0x8086, 0x8c08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, /* SATA Controller IDE (Lynx Point) */ { 0x8086, 0x8c09, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, + /* SATA Controller IDE (Lynx Point-LP) */ + { 0x8086, 0x9c00, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb }, + /* SATA Controller IDE (Lynx Point-LP) */ + { 0x8086, 0x9c01, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb }, + /* SATA Controller IDE (Lynx Point-LP) */ + { 0x8086, 0x9c08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, + /* SATA Controller IDE (Lynx Point-LP) */ + { 0x8086, 0x9c09, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, /* SATA Controller IDE (DH89xxCC) */ { 0x8086, 0x2326, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, { } /* terminate list */ diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index f9eaa82311a9..555c07afa05b 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -1139,7 +1139,7 @@ static void ahci_dev_config(struct ata_device *dev) } } -static unsigned int ahci_dev_classify(struct ata_port *ap) +unsigned int ahci_dev_classify(struct ata_port *ap) { void __iomem *port_mmio = ahci_port_base(ap); struct ata_taskfile tf; @@ -1153,6 +1153,7 @@ static unsigned int ahci_dev_classify(struct ata_port *ap) return ata_dev_classify(&tf); } +EXPORT_SYMBOL_GPL(ahci_dev_classify); void ahci_fill_cmd_slot(struct ahci_port_priv *pp, unsigned int tag, u32 opts) diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c index 902b5a457170..fd9ecf74e631 100644 --- a/drivers/ata/libata-acpi.c +++ b/drivers/ata/libata-acpi.c @@ -60,17 +60,7 @@ acpi_handle ata_ap_acpi_handle(struct ata_port *ap) if (ap->flags & ATA_FLAG_ACPI_SATA) return NULL; - /* - * If acpi bind operation has already happened, we can get the handle - * for the port by checking the corresponding scsi_host device's - * firmware node, otherwise we will need to find out the handle from - * its parent's acpi node. - */ - if (ap->scsi_host) - return DEVICE_ACPI_HANDLE(&ap->scsi_host->shost_gendev); - else - return acpi_get_child(DEVICE_ACPI_HANDLE(ap->host->dev), - ap->port_no); + return acpi_get_child(DEVICE_ACPI_HANDLE(ap->host->dev), ap->port_no); } EXPORT_SYMBOL(ata_ap_acpi_handle); @@ -1101,6 +1091,9 @@ static int ata_acpi_bind_host(struct ata_port *ap, acpi_handle *handle) if (!*handle) return -ENODEV; + if (ata_acpi_gtm(ap, &ap->__acpi_init_gtm) == 0) + ap->pflags |= ATA_PFLAG_INIT_GTM_VALID; + return 0; } diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index fadd5866d40f..8e1039c8e159 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4062,7 +4062,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "_NEC DV5800A", NULL, ATA_HORKAGE_NODMA }, { "SAMSUNG CD-ROM SN-124", "N001", ATA_HORKAGE_NODMA }, { "Seagate STT20000A", NULL, ATA_HORKAGE_NODMA }, - { "2GB ATA Flash Disk", "ADMA428M", ATA_HORKAGE_NODMA }, + { " 2GB ATA Flash Disk", "ADMA428M", ATA_HORKAGE_NODMA }, /* Odd clown on sil3726/4726 PMPs */ { "Config Disk", NULL, ATA_HORKAGE_DISABLE }, @@ -4128,6 +4128,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Devices that do not need bridging limits applied */ { "MTRON MSP-SATA*", NULL, ATA_HORKAGE_BRIDGE_OK, }, + { "BUFFALO HD-QSU2/R5", NULL, ATA_HORKAGE_BRIDGE_OK, }, /* Devices which aren't very happy with higher link speeds */ { "WD My Book", NULL, ATA_HORKAGE_1_5_GBPS, }, diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c index 361c75cea57b..24e51056ac26 100644 --- a/drivers/ata/pata_atiixp.c +++ b/drivers/ata/pata_atiixp.c @@ -20,6 +20,7 @@ #include <linux/delay.h> #include <scsi/scsi_host.h> #include <linux/libata.h> +#include <linux/dmi.h> #define DRV_NAME "pata_atiixp" #define DRV_VERSION "0.4.6" @@ -33,11 +34,26 @@ enum { ATIIXP_IDE_UDMA_MODE = 0x56 }; +static const struct dmi_system_id attixp_cable_override_dmi_table[] = { + { + /* Board has onboard PATA<->SATA converters */ + .ident = "MSI E350DM-E33", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "MSI"), + DMI_MATCH(DMI_BOARD_NAME, "E350DM-E33(MS-7720)"), + }, + }, + { } +}; + static int atiixp_cable_detect(struct ata_port *ap) { struct pci_dev *pdev = to_pci_dev(ap->host->dev); u8 udma; + if (dmi_check_system(attixp_cable_override_dmi_table)) + return ATA_CBL_PATA40_SHORT; + /* Hack from drivers/ide/pci. Really we want to know how to do the raw detection not play follow the bios mode guess */ pci_read_config_byte(pdev, ATIIXP_IDE_UDMA_MODE + ap->port_no, &udma); diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index ba91b408abad..d84566496746 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -889,6 +889,7 @@ struct bm_aio_ctx { unsigned int done; unsigned flags; #define BM_AIO_COPY_PAGES 1 +#define BM_WRITE_ALL_PAGES 2 int error; struct kref kref; }; @@ -1059,7 +1060,8 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) break; if (rw & WRITE) { - if (bm_test_page_unchanged(b->bm_pages[i])) { + if (!(flags & BM_WRITE_ALL_PAGES) && + bm_test_page_unchanged(b->bm_pages[i])) { dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); continue; } @@ -1141,6 +1143,17 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) } /** + * drbd_bm_write_all() - Write the whole bitmap to its on disk location. + * @mdev: DRBD device. + * + * Will write all pages. + */ +int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, WRITE, BM_WRITE_ALL_PAGES, 0); +} + +/** * drbd_bm_lazy_write_out() - Write bitmap pages 0 to @upper_idx-1, if they have changed. * @mdev: DRBD device. * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b2ca143d0053..b953cc7c9c00 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1469,6 +1469,7 @@ extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); +extern int drbd_bm_write_all(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local); extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index dbe6135a2abe..f93a0320e952 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -79,6 +79,7 @@ static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); static void md_sync_timer_fn(unsigned long data); static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused); +static void _tl_clear(struct drbd_conf *mdev); MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " "Lars Ellenberg <lars@linbit.com>"); @@ -432,19 +433,10 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) /* Actions operating on the disk state, also want to work on requests that got barrier acked. */ - switch (what) { - case fail_frozen_disk_io: - case restart_frozen_disk_io: - list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { - req = list_entry(le, struct drbd_request, tl_requests); - _req_mod(req, what); - } - case connection_lost_while_pending: - case resend: - break; - default: - dev_err(DEV, "what = %d in _tl_restart()\n", what); + list_for_each_safe(le, tle, &mdev->barrier_acked_requests) { + req = list_entry(le, struct drbd_request, tl_requests); + _req_mod(req, what); } } @@ -459,11 +451,16 @@ static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) */ void tl_clear(struct drbd_conf *mdev) { + spin_lock_irq(&mdev->req_lock); + _tl_clear(mdev); + spin_unlock_irq(&mdev->req_lock); +} + +static void _tl_clear(struct drbd_conf *mdev) +{ struct list_head *le, *tle; struct drbd_request *r; - spin_lock_irq(&mdev->req_lock); - _tl_restart(mdev, connection_lost_while_pending); /* we expect this list to be empty. */ @@ -482,7 +479,6 @@ void tl_clear(struct drbd_conf *mdev) memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); - spin_unlock_irq(&mdev->req_lock); } void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what) @@ -1476,12 +1472,12 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, if (ns.susp_fen) { /* case1: The outdate peer handler is successful: */ if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) { - tl_clear(mdev); if (test_bit(NEW_CUR_UUID, &mdev->flags)) { drbd_uuid_new_current(mdev); clear_bit(NEW_CUR_UUID, &mdev->flags); } spin_lock_irq(&mdev->req_lock); + _tl_clear(mdev); _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL); spin_unlock_irq(&mdev->req_lock); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index fb9dce8daa24..edb490aad8b4 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -674,8 +674,8 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds la_size_changed && md_moved ? "size changed and md moved" : la_size_changed ? "size changed" : "md moved"); /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */ - err = drbd_bitmap_io(mdev, &drbd_bm_write, - "size changed", BM_LOCKED_MASK); + err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write, + "size changed", BM_LOCKED_MASK); if (err) { rv = dev_size_error; goto out; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 910335c30927..01b2ac641c7b 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -695,6 +695,12 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, break; case resend: + /* Simply complete (local only) READs. */ + if (!(req->rq_state & RQ_WRITE) && !req->w.cb) { + _req_may_be_done(req, m); + break; + } + /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK before the connection loss (B&C only); only P_BARRIER_ACK was missing. Trowing them out of the TL here by pretending we got a BARRIER_ACK @@ -834,7 +840,15 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns req->private_bio = NULL; } if (rw == WRITE) { - remote = 1; + /* Need to replicate writes. Unless it is an empty flush, + * which is better mapped to a DRBD P_BARRIER packet, + * also for drbd wire protocol compatibility reasons. */ + if (unlikely(size == 0)) { + /* The only size==0 bios we expect are empty flushes. */ + D_ASSERT(bio->bi_rw & REQ_FLUSH); + remote = 0; + } else + remote = 1; } else { /* READ || READA */ if (local) { @@ -870,8 +884,11 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, uns * extent. This waits for any resync activity in the corresponding * resync extent to finish, and, if necessary, pulls in the target * extent into the activity log, which involves further disk io because - * of transactional on-disk meta data updates. */ - if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) { + * of transactional on-disk meta data updates. + * Empty flushes don't need to go into the activity log, they can only + * flush data for pending writes which are already in there. */ + if (rw == WRITE && local && size + && !test_bit(AL_SUSPENDED, &mdev->flags)) { req->rq_state |= RQ_IN_ACT_LOG; drbd_al_begin_io(mdev, sector); } @@ -994,7 +1011,10 @@ allocate_barrier: if (rw == WRITE && _req_conflicts(req)) goto fail_conflicting; - list_add_tail(&req->tl_requests, &mdev->newest_tle->requests); + /* no point in adding empty flushes to the transfer log, + * they are mapped to drbd barriers already. */ + if (likely(size!=0)) + list_add_tail(&req->tl_requests, &mdev->newest_tle->requests); /* NOTE remote first: to get the concurrent write detection right, * we must register the request before start of local IO. */ @@ -1014,6 +1034,14 @@ allocate_barrier: mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) maybe_pull_ahead(mdev); + /* If this was a flush, queue a drbd barrier/start a new epoch. + * Unless the current epoch was empty anyways, or we are not currently + * replicating, in which case there is no point. */ + if (unlikely(bio->bi_rw & REQ_FLUSH) + && mdev->newest_tle->n_writes + && drbd_should_do_remote(mdev->state)) + queue_barrier(mdev); + spin_unlock_irq(&mdev->req_lock); kfree(b); /* if someone else has beaten us to it... */ diff --git a/drivers/cpufreq/omap-cpufreq.c b/drivers/cpufreq/omap-cpufreq.c index 17fa04d08be9..b47034e650a5 100644 --- a/drivers/cpufreq/omap-cpufreq.c +++ b/drivers/cpufreq/omap-cpufreq.c @@ -218,7 +218,7 @@ static int __cpuinit omap_cpu_init(struct cpufreq_policy *policy) policy->cur = policy->min = policy->max = omap_getspeed(policy->cpu); - if (atomic_inc_return(&freq_table_users) == 1) + if (!freq_table) result = opp_init_cpufreq_table(mpu_dev, &freq_table); if (result) { @@ -227,6 +227,8 @@ static int __cpuinit omap_cpu_init(struct cpufreq_policy *policy) goto fail_ck; } + atomic_inc_return(&freq_table_users); + result = cpufreq_frequency_table_cpuinfo(policy, freq_table); if (result) goto fail_table; diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 53c8c51d5881..93d14070141a 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -63,7 +63,7 @@ static void caam_jr_dequeue(unsigned long devarg) head = ACCESS_ONCE(jrp->head); - spin_lock_bh(&jrp->outlock); + spin_lock(&jrp->outlock); sw_idx = tail = jrp->tail; hw_idx = jrp->out_ring_read_index; @@ -115,7 +115,7 @@ static void caam_jr_dequeue(unsigned long devarg) jrp->tail = tail; } - spin_unlock_bh(&jrp->outlock); + spin_unlock(&jrp->outlock); /* Finally, execute user's callback */ usercall(dev, userdesc, userstatus, userarg); @@ -236,14 +236,14 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, return -EIO; } - spin_lock(&jrp->inplock); + spin_lock_bh(&jrp->inplock); head = jrp->head; tail = ACCESS_ONCE(jrp->tail); if (!rd_reg32(&jrp->rregs->inpring_avail) || CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) { - spin_unlock(&jrp->inplock); + spin_unlock_bh(&jrp->inplock); dma_unmap_single(dev, desc_dma, desc_size, DMA_TO_DEVICE); return -EBUSY; } @@ -265,7 +265,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, wr_reg32(&jrp->rregs->inpring_jobadd, 1); - spin_unlock(&jrp->inplock); + spin_unlock_bh(&jrp->inplock); return 0; } diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c index c9c4befb5a8d..df14358d7fa1 100644 --- a/drivers/crypto/hifn_795x.c +++ b/drivers/crypto/hifn_795x.c @@ -821,8 +821,8 @@ static int hifn_register_rng(struct hifn_device *dev) /* * We must wait at least 256 Pk_clk cycles between two reads of the rng. */ - dev->rng_wait_time = DIV_ROUND_UP(NSEC_PER_SEC, dev->pk_clk_freq) * - 256; + dev->rng_wait_time = DIV_ROUND_UP_ULL(NSEC_PER_SEC, + dev->pk_clk_freq) * 256; dev->rng.name = dev->name; dev->rng.data_present = hifn_rng_data_present, diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 60ea284407ce..8bf8a64e5115 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1624,7 +1624,6 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_WKB2000) }, { HID_USB_DEVICE(USB_VENDOR_ID_PETALYNX, USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE) }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_KEYBOARD) }, - { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_PIXART_IMAGING_INC_OPTICAL_TOUCH_SCREEN) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONE) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ARVO) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ISKU) }, diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 92406097efeb..8d1e32d7cd97 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -4,7 +4,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) { - ide_drive_t *drive = dev_get_drvdata(dev); + ide_drive_t *drive = to_ide_device(dev); ide_drive_t *pair = ide_get_pair_dev(drive); ide_hwif_t *hwif = drive->hwif; struct request *rq; @@ -40,7 +40,7 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) int generic_ide_resume(struct device *dev) { - ide_drive_t *drive = dev_get_drvdata(dev); + ide_drive_t *drive = to_ide_device(dev); ide_drive_t *pair = ide_get_pair_dev(drive); ide_hwif_t *hwif = drive->hwif; struct request *rq; diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c index ea0aaa3f13d0..a9f4049c6769 100644 --- a/drivers/spi/spi-bcm63xx.c +++ b/drivers/spi/spi-bcm63xx.c @@ -47,6 +47,8 @@ struct bcm63xx_spi { /* Platform data */ u32 speed_hz; unsigned fifo_size; + unsigned int msg_type_shift; + unsigned int msg_ctl_width; /* Data buffers */ const unsigned char *tx_ptr; @@ -221,13 +223,20 @@ static unsigned int bcm63xx_txrx_bufs(struct spi_device *spi, msg_ctl = (t->len << SPI_BYTE_CNT_SHIFT); if (t->rx_buf && t->tx_buf) - msg_ctl |= (SPI_FD_RW << SPI_MSG_TYPE_SHIFT); + msg_ctl |= (SPI_FD_RW << bs->msg_type_shift); else if (t->rx_buf) - msg_ctl |= (SPI_HD_R << SPI_MSG_TYPE_SHIFT); + msg_ctl |= (SPI_HD_R << bs->msg_type_shift); else if (t->tx_buf) - msg_ctl |= (SPI_HD_W << SPI_MSG_TYPE_SHIFT); - - bcm_spi_writew(bs, msg_ctl, SPI_MSG_CTL); + msg_ctl |= (SPI_HD_W << bs->msg_type_shift); + + switch (bs->msg_ctl_width) { + case 8: + bcm_spi_writeb(bs, msg_ctl, SPI_MSG_CTL); + break; + case 16: + bcm_spi_writew(bs, msg_ctl, SPI_MSG_CTL); + break; + } /* Issue the transfer */ cmd = SPI_CMD_START_IMMEDIATE; @@ -406,9 +415,21 @@ static int __devinit bcm63xx_spi_probe(struct platform_device *pdev) master->transfer_one_message = bcm63xx_spi_transfer_one; master->mode_bits = MODEBITS; bs->speed_hz = pdata->speed_hz; + bs->msg_type_shift = pdata->msg_type_shift; + bs->msg_ctl_width = pdata->msg_ctl_width; bs->tx_io = (u8 *)(bs->regs + bcm63xx_spireg(SPI_MSG_DATA)); bs->rx_io = (const u8 *)(bs->regs + bcm63xx_spireg(SPI_RX_DATA)); + switch (bs->msg_ctl_width) { + case 8: + case 16: + break; + default: + dev_err(dev, "unsupported MSG_CTL width: %d\n", + bs->msg_ctl_width); + goto out_clk_disable; + } + /* Initialize hardware */ clk_enable(bs->clk); bcm_spi_writeb(bs, SPI_INTR_CLEAR_ALL, SPI_INT_STATUS); diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c index 3fe82d0e8caa..5b06d31ab6a9 100644 --- a/drivers/watchdog/booke_wdt.c +++ b/drivers/watchdog/booke_wdt.c @@ -166,18 +166,17 @@ static long booke_wdt_ioctl(struct file *file, switch (cmd) { case WDIOC_GETSUPPORT: - if (copy_to_user((void *)arg, &ident, sizeof(ident))) - return -EFAULT; + return copy_to_user(p, &ident, sizeof(ident)) ? -EFAULT : 0; case WDIOC_GETSTATUS: return put_user(0, p); case WDIOC_GETBOOTSTATUS: /* XXX: something is clearing TSR */ tmp = mfspr(SPRN_TSR) & TSR_WRS(3); /* returns CARDRESET if last reset was caused by the WDT */ - return (tmp ? WDIOF_CARDRESET : 0); + return put_user((tmp ? WDIOF_CARDRESET : 0), p); case WDIOC_SETOPTIONS: if (get_user(tmp, p)) - return -EINVAL; + return -EFAULT; if (tmp == WDIOS_ENABLECARD) { booke_wdt_ping(); break; diff --git a/drivers/xen/platform-pci.c b/drivers/xen/platform-pci.c index d4c50d63acbc..97ca359ae2bd 100644 --- a/drivers/xen/platform-pci.c +++ b/drivers/xen/platform-pci.c @@ -101,19 +101,6 @@ static int platform_pci_resume(struct pci_dev *pdev) return 0; } -static void __devinit prepare_shared_info(void) -{ -#ifdef CONFIG_KEXEC - unsigned long addr; - struct shared_info *hvm_shared_info; - - addr = alloc_xen_mmio(PAGE_SIZE); - hvm_shared_info = ioremap(addr, PAGE_SIZE); - memset(hvm_shared_info, 0, PAGE_SIZE); - xen_hvm_prepare_kexec(hvm_shared_info, addr >> PAGE_SHIFT); -#endif -} - static int __devinit platform_pci_init(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -151,8 +138,6 @@ static int __devinit platform_pci_init(struct pci_dev *pdev, platform_mmio = mmio_addr; platform_mmiolen = mmio_len; - prepare_shared_info(); - if (!xen_have_vector_callback) { ret = xen_allocate_irq(pdev); if (ret) { @@ -73,7 +73,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) { unsigned int sz = sizeof(struct bio) + extra_size; struct kmem_cache *slab = NULL; - struct bio_slab *bslab; + struct bio_slab *bslab, *new_bio_slabs; unsigned int i, entry = -1; mutex_lock(&bio_slab_lock); @@ -97,11 +97,12 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) if (bio_slab_nr == bio_slab_max && entry == -1) { bio_slab_max <<= 1; - bio_slabs = krealloc(bio_slabs, - bio_slab_max * sizeof(struct bio_slab), - GFP_KERNEL); - if (!bio_slabs) + new_bio_slabs = krealloc(bio_slabs, + bio_slab_max * sizeof(struct bio_slab), + GFP_KERNEL); + if (!new_bio_slabs) goto out_unlock; + bio_slabs = new_bio_slabs; } if (entry == -1) entry = bio_slab_nr++; diff --git a/fs/block_dev.c b/fs/block_dev.c index 1e519195d45b..38e721b35d45 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1578,10 +1578,12 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; + struct blk_plug plug; ssize_t ret; BUG_ON(iocb->ki_pos != pos); + blk_start_plug(&plug); ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); if (ret > 0 || ret == -EIOCBQUEUED) { ssize_t err; @@ -1590,6 +1592,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0 && ret > 0) ret = err; } + blk_finish_plug(&plug); return ret; } EXPORT_SYMBOL_GPL(blkdev_aio_write); diff --git a/fs/buffer.c b/fs/buffer.c index 9f6d2e41281d..58e2e7b77372 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -914,7 +914,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head) /* * Initialise the state of a blockdev page's buffers. */ -static void +static sector_t init_page_buffers(struct page *page, struct block_device *bdev, sector_t block, int size) { @@ -936,33 +936,41 @@ init_page_buffers(struct page *page, struct block_device *bdev, block++; bh = bh->b_this_page; } while (bh != head); + + /* + * Caller needs to validate requested block against end of device. + */ + return end_block; } /* * Create the page-cache page that contains the requested block. * - * This is user purely for blockdev mappings. + * This is used purely for blockdev mappings. */ -static struct page * +static int grow_dev_page(struct block_device *bdev, sector_t block, - pgoff_t index, int size) + pgoff_t index, int size, int sizebits) { struct inode *inode = bdev->bd_inode; struct page *page; struct buffer_head *bh; + sector_t end_block; + int ret = 0; /* Will call free_more_memory() */ page = find_or_create_page(inode->i_mapping, index, (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); if (!page) - return NULL; + return ret; BUG_ON(!PageLocked(page)); if (page_has_buffers(page)) { bh = page_buffers(page); if (bh->b_size == size) { - init_page_buffers(page, bdev, block, size); - return page; + end_block = init_page_buffers(page, bdev, + index << sizebits, size); + goto done; } if (!try_to_free_buffers(page)) goto failed; @@ -982,14 +990,14 @@ grow_dev_page(struct block_device *bdev, sector_t block, */ spin_lock(&inode->i_mapping->private_lock); link_dev_buffers(page, bh); - init_page_buffers(page, bdev, block, size); + end_block = init_page_buffers(page, bdev, index << sizebits, size); spin_unlock(&inode->i_mapping->private_lock); - return page; - +done: + ret = (block < end_block) ? 1 : -ENXIO; failed: unlock_page(page); page_cache_release(page); - return NULL; + return ret; } /* @@ -999,7 +1007,6 @@ failed: static int grow_buffers(struct block_device *bdev, sector_t block, int size) { - struct page *page; pgoff_t index; int sizebits; @@ -1023,22 +1030,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) bdevname(bdev, b)); return -EIO; } - block = index << sizebits; + /* Create a page with the proper size buffers.. */ - page = grow_dev_page(bdev, block, index, size); - if (!page) - return 0; - unlock_page(page); - page_cache_release(page); - return 1; + return grow_dev_page(bdev, block, index, size, sizebits); } static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, int size) { - int ret; - struct buffer_head *bh; - /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { @@ -1051,21 +1050,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) return NULL; } -retry: - bh = __find_get_block(bdev, block, size); - if (bh) - return bh; + for (;;) { + struct buffer_head *bh; + int ret; - ret = grow_buffers(bdev, block, size); - if (ret == 0) { - free_more_memory(); - goto retry; - } else if (ret > 0) { bh = __find_get_block(bdev, block, size); if (bh) return bh; + + ret = grow_buffers(bdev, block, size); + if (ret < 0) + return NULL; + if (ret == 0) + free_more_memory(); } - return NULL; } /* @@ -1321,10 +1319,6 @@ EXPORT_SYMBOL(__find_get_block); * which corresponds to the passed block_device, block and size. The * returned buffer has its reference count incremented. * - * __getblk() cannot fail - it just keeps trying. If you pass it an - * illegal block number, __getblk() will happily return a buffer_head - * which represents the non-existent block. Very weird. - * * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() * attempt is failing. FIXME, perhaps? */ diff --git a/fs/direct-io.c b/fs/direct-io.c index 1faf4cb56f39..f86c720dba0e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1062,6 +1062,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, unsigned long user_addr; size_t bytes; struct buffer_head map_bh = { 0, }; + struct blk_plug plug; if (rw & WRITE) rw = WRITE_ODIRECT; @@ -1177,6 +1178,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, PAGE_SIZE - user_addr / PAGE_SIZE); } + blk_start_plug(&plug); + for (seg = 0; seg < nr_segs; seg++) { user_addr = (unsigned long)iov[seg].iov_base; sdio.size += bytes = iov[seg].iov_len; @@ -1235,6 +1238,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (sdio.bio) dio_bio_submit(dio, &sdio); + blk_finish_plug(&plug); + /* * It is possible that, we return short IO due to end of file. * In that case, we need to release all the pages we got hold on. diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 09357508ec9a..a2862339323b 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1113,6 +1113,11 @@ static void mark_journal_empty(journal_t *journal) BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); spin_lock(&journal->j_state_lock); + /* Is it already empty? */ + if (sb->s_start == 0) { + spin_unlock(&journal->j_state_lock); + return; + } jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n", journal->j_tail_sequence); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index cbaf4f8bb7b7..4c7bd35b1876 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -651,12 +651,12 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c if (clp->cl_minorversion == 0) { if (!clp->cl_cred.cr_principal && - (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) + (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; args.client_name = clp->cl_cred.cr_principal; args.prognumber = conn->cb_prog, args.protocol = XPRT_TRANSPORT_TCP; - args.authflavor = clp->cl_flavor; + args.authflavor = clp->cl_cred.cr_flavor; clp->cl_cb_ident = conn->cb_ident; } else { if (!conn->cb_xprt) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e6173147f982..22bd0a66c356 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -231,7 +231,6 @@ struct nfs4_client { nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ - u32 cl_flavor; /* setclientid pseudoflavor */ struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 36a29b753c79..c495a3055e2a 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1589,10 +1589,10 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) goto out; } - down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); for (cnt = 0; cnt < MAXQUOTAS; cnt++) warn[cnt].w_type = QUOTA_NL_NOWARN; + down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!dquots[cnt]) diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 4c0c7d163d15..a98b7740a0fc 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1334,9 +1334,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, else if (bitmap == 0) block = (REISERFS_DISK_OFFSET_IN_BYTES >> sb->s_blocksize_bits) + 1; - reiserfs_write_unlock(sb); bh = sb_bread(sb, block); - reiserfs_write_lock(sb); if (bh == NULL) reiserfs_warning(sb, "sh-2029: %s: bitmap block (#%u) " "reading failed", __func__, block); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index a6d4268fb6c1..855da58db145 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -76,10 +76,10 @@ void reiserfs_evict_inode(struct inode *inode) ; } out: + reiserfs_write_unlock_once(inode->i_sb, depth); clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */ dquot_drop(inode); inode->i_blocks = 0; - reiserfs_write_unlock_once(inode->i_sb, depth); return; no_delete: diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 8b8cc4e945f4..760de723dadb 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -167,7 +167,7 @@ struct ubifs_global_debug_info { #define ubifs_dbg_msg(type, fmt, ...) \ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) -#define DBG_KEY_BUF_LEN 32 +#define DBG_KEY_BUF_LEN 48 #define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \ diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index ce33b2beb151..8640920766ed 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -1749,7 +1749,10 @@ int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr) return 0; out_err: - ubifs_lpt_free(c, 0); + if (wr) + ubifs_lpt_free(c, 1); + if (rd) + ubifs_lpt_free(c, 0); return err; } diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index c30d976b4be8..edeec499c048 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -788,7 +788,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, corrupted_rescan: /* Re-scan the corrupted data with verbose messages */ - ubifs_err("corruptio %d", ret); + ubifs_err("corruption %d", ret); ubifs_scan_a_node(c, buf, len, lnum, offs, 1); corrupted: ubifs_scanned_corruption(c, lnum, offs, buf); diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index eba46d4a7619..94d78fc5d4e0 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -1026,7 +1026,6 @@ int ubifs_replay_journal(struct ubifs_info *c) c->replaying = 1; lnum = c->ltail_lnum = c->lhead_lnum; - lnum = UBIFS_LOG_LNUM; do { err = replay_log_leb(c, lnum, 0, c->sbuf); if (err == 1) @@ -1035,7 +1034,7 @@ int ubifs_replay_journal(struct ubifs_info *c) if (err) goto out; lnum = ubifs_next_log_lnum(c, lnum); - } while (lnum != UBIFS_LOG_LNUM); + } while (lnum != c->ltail_lnum); err = replay_buds(c); if (err) diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index c3fa6c5327a3..71a197f0f93d 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1157,9 +1157,6 @@ static int check_free_space(struct ubifs_info *c) * * This function mounts UBIFS file system. Returns zero in case of success and * a negative error code in case of failure. - * - * Note, the function does not de-allocate resources it it fails half way - * through, and the caller has to do this instead. */ static int mount_ubifs(struct ubifs_info *c) { diff --git a/fs/udf/inode.c b/fs/udf/inode.c index fafaad795cd6..aa233469b3c1 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1124,14 +1124,17 @@ int udf_setsize(struct inode *inode, loff_t newsize) if (err) return err; down_write(&iinfo->i_data_sem); - } else + } else { iinfo->i_lenAlloc = newsize; + goto set_size; + } } err = udf_extend_file(inode, newsize); if (err) { up_write(&iinfo->i_data_sem); return err; } +set_size: truncate_setsize(inode, newsize); up_write(&iinfo->i_data_sem); } else { diff --git a/fs/udf/super.c b/fs/udf/super.c index dcbf98722afc..18fc038a438d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1344,6 +1344,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, udf_err(sb, "error loading logical volume descriptor: " "Partition table too long (%u > %lu)\n", table_len, sb->s_blocksize - sizeof(*lvd)); + ret = 1; goto out_bh; } @@ -1388,8 +1389,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) { if (udf_load_sparable_map(sb, map, - (struct sparablePartitionMap *)gpm) < 0) + (struct sparablePartitionMap *)gpm) < 0) { + ret = 1; goto out_bh; + } } else if (!strncmp(upm2->partIdent.ident, UDF_ID_METADATA, strlen(UDF_ID_METADATA))) { @@ -2000,6 +2003,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (!silent) pr_notice("Rescanning with blocksize %d\n", UDF_DEFAULT_BLOCKSIZE); + brelse(sbi->s_lvid_bh); + sbi->s_lvid_bh = NULL; uopt.blocksize = UDF_DEFAULT_BLOCKSIZE; ret = udf_load_vrs(sb, &uopt, silent, &fileset); } diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index f9c3fe304a17..69cf4fcde03e 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -179,12 +179,14 @@ xfs_ioc_trim( * used by the fstrim application. In the end it really doesn't * matter as trimming blocks is an advisory interface. */ + if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || + range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp))) + return -XFS_ERROR(EINVAL); + start = BTOBB(range.start); end = start + BTOBBT(range.len) - 1; minlen = BTOBB(max_t(u64, granularity, range.minlen)); - if (XFS_BB_TO_FSB(mp, start) >= mp->m_sb.sb_dblocks) - return -XFS_ERROR(EINVAL); if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1; diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 21e37b55f7e5..5aceb3f8ecd6 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -962,23 +962,22 @@ xfs_dialloc( if (!pag->pagi_freecount && !okalloc) goto nextag; + /* + * Then read in the AGI buffer and recheck with the AGI buffer + * lock held. + */ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); if (error) goto out_error; - /* - * Once the AGI has been read in we have to recheck - * pagi_freecount with the AGI buffer lock held. - */ if (pag->pagi_freecount) { xfs_perag_put(pag); goto out_alloc; } - if (!okalloc) { - xfs_trans_brelse(tp, agbp); - goto nextag; - } + if (!okalloc) + goto nextag_relse_buffer; + error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); if (error) { @@ -1007,6 +1006,8 @@ xfs_dialloc( return 0; } +nextag_relse_buffer: + xfs_trans_brelse(tp, agbp); nextag: xfs_perag_put(pag); if (++agno == mp->m_sb.sb_agcount) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 92d4331cd4f1..ca28a4ba4b54 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -857,7 +857,7 @@ xfs_rtbuf_get( xfs_buf_t *bp; /* block buffer, result */ xfs_inode_t *ip; /* bitmap or summary inode */ xfs_bmbt_irec_t map; - int nmap; + int nmap = 1; int error; /* error value */ ip = issum ? mp->m_rsumip : mp->m_rbmip; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4e72a9d48232..4a2ab7c85393 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -601,7 +601,7 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync) * it already be started by driver. */ #define RQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) + (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_DISCARD) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ (((rq)->cmd_flags & REQ_DISCARD) || \ @@ -894,6 +894,8 @@ extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); +extern int blk_bio_map_sg(struct request_queue *q, struct bio *bio, + struct scatterlist *sglist); extern void blk_dump_rq_flags(struct request *, char *); extern long nr_blockdev_pages(void); @@ -1139,6 +1141,16 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector & (lim->discard_granularity - 1); } +static inline int bdev_discard_alignment(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + if (bdev != bdev->bd_contains) + return bdev->bd_part->discard_alignment; + + return q->limits.discard_alignment; +} + static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) { if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 040b13b5c14a..279b1eaa8b73 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -194,6 +194,10 @@ static inline int cpuidle_play_dead(void) {return -ENODEV; } #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a); +#else +static inline void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a) +{ +} #endif /****************************** diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 55e6d63d46d0..a52f2f4fe030 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -10,6 +10,7 @@ #include <linux/kallsyms.h> #include <linux/linkage.h> #include <linux/bitops.h> +#include <linux/ptrace.h> #include <linux/ktime.h> #include <linux/sched.h> #include <linux/types.h> @@ -18,6 +19,28 @@ #include <asm/ftrace.h> +/* + * If the arch supports passing the variable contents of + * function_trace_op as the third parameter back from the + * mcount call, then the arch should define this as 1. + */ +#ifndef ARCH_SUPPORTS_FTRACE_OPS +#define ARCH_SUPPORTS_FTRACE_OPS 0 +#endif + +/* + * If the arch's mcount caller does not support all of ftrace's + * features, then it must call an indirect function that + * does. Or at least does enough to prevent any unwelcomed side effects. + */ +#if !defined(CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST) || \ + !ARCH_SUPPORTS_FTRACE_OPS +# define FTRACE_FORCE_LIST_FUNC 1 +#else +# define FTRACE_FORCE_LIST_FUNC 0 +#endif + + struct module; struct ftrace_hash; @@ -29,7 +52,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); +struct ftrace_ops; + +typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs); /* * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are @@ -45,12 +71,33 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); * could be controled by following calls: * ftrace_function_local_enable * ftrace_function_local_disable + * SAVE_REGS - The ftrace_ops wants regs saved at each function called + * and passed to the callback. If this flag is set, but the + * architecture does not support passing regs + * (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the + * ftrace_ops will fail to register, unless the next flag + * is set. + * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the + * handler can handle an arch that does not save regs + * (the handler tests if regs == NULL), then it can set + * this flag instead. It will not fail registering the ftrace_ops + * but, the regs field will be NULL if the arch does not support + * passing regs to the handler. + * Note, if this flag is set, the SAVE_REGS flag will automatically + * get set upon registering the ftrace_ops, if the arch supports it. + * RECURSION_SAFE - The ftrace_ops can set this to tell the ftrace infrastructure + * that the call back has its own recursion protection. If it does + * not set this, then the ftrace infrastructure will add recursion + * protection for the caller. */ enum { - FTRACE_OPS_FL_ENABLED = 1 << 0, - FTRACE_OPS_FL_GLOBAL = 1 << 1, - FTRACE_OPS_FL_DYNAMIC = 1 << 2, - FTRACE_OPS_FL_CONTROL = 1 << 3, + FTRACE_OPS_FL_ENABLED = 1 << 0, + FTRACE_OPS_FL_GLOBAL = 1 << 1, + FTRACE_OPS_FL_DYNAMIC = 1 << 2, + FTRACE_OPS_FL_CONTROL = 1 << 3, + FTRACE_OPS_FL_SAVE_REGS = 1 << 4, + FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 5, + FTRACE_OPS_FL_RECURSION_SAFE = 1 << 6, }; struct ftrace_ops { @@ -163,7 +210,8 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) return *this_cpu_ptr(ops->disabled); } -extern void ftrace_stub(unsigned long a0, unsigned long a1); +extern void ftrace_stub(unsigned long a0, unsigned long a1, + struct ftrace_ops *op, struct pt_regs *regs); #else /* !CONFIG_FUNCTION_TRACER */ /* @@ -172,6 +220,10 @@ extern void ftrace_stub(unsigned long a0, unsigned long a1); */ #define register_ftrace_function(ops) ({ 0; }) #define unregister_ftrace_function(ops) ({ 0; }) +static inline int ftrace_nr_registered_ops(void) +{ + return 0; +} static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } static inline void ftrace_stop(void) { } @@ -227,12 +279,33 @@ extern void unregister_ftrace_function_probe_all(char *glob); extern int ftrace_text_reserved(void *start, void *end); +extern int ftrace_nr_registered_ops(void); + +/* + * The dyn_ftrace record's flags field is split into two parts. + * the first part which is '0-FTRACE_REF_MAX' is a counter of + * the number of callbacks that have registered the function that + * the dyn_ftrace descriptor represents. + * + * The second part is a mask: + * ENABLED - the function is being traced + * REGS - the record wants the function to save regs + * REGS_EN - the function is set up to save regs. + * + * When a new ftrace_ops is registered and wants a function to save + * pt_regs, the rec->flag REGS is set. When the function has been + * set up to save regs, the REG_EN flag is set. Once a function + * starts saving regs it will do so until all ftrace_ops are removed + * from tracing that function. + */ enum { - FTRACE_FL_ENABLED = (1 << 30), + FTRACE_FL_ENABLED = (1UL << 29), + FTRACE_FL_REGS = (1UL << 30), + FTRACE_FL_REGS_EN = (1UL << 31) }; -#define FTRACE_FL_MASK (0x3UL << 30) -#define FTRACE_REF_MAX ((1 << 30) - 1) +#define FTRACE_FL_MASK (0x7UL << 29) +#define FTRACE_REF_MAX ((1UL << 29) - 1) struct dyn_ftrace { union { @@ -244,6 +317,8 @@ struct dyn_ftrace { }; int ftrace_force_update(void); +int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, + int remove, int reset); int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, @@ -263,9 +338,23 @@ enum { FTRACE_STOP_FUNC_RET = (1 << 4), }; +/* + * The FTRACE_UPDATE_* enum is used to pass information back + * from the ftrace_update_record() and ftrace_test_record() + * functions. These are called by the code update routines + * to find out what is to be done for a given function. + * + * IGNORE - The function is already what we want it to be + * MAKE_CALL - Start tracing the function + * MODIFY_CALL - Stop saving regs for the function + * MODIFY_CALL_REGS - Start saving regs for the function + * MAKE_NOP - Stop tracing the function + */ enum { FTRACE_UPDATE_IGNORE, FTRACE_UPDATE_MAKE_CALL, + FTRACE_UPDATE_MODIFY_CALL, + FTRACE_UPDATE_MODIFY_CALL_REGS, FTRACE_UPDATE_MAKE_NOP, }; @@ -317,7 +406,9 @@ extern int ftrace_dyn_arch_init(void *data); extern void ftrace_replace_code(int enable); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); +extern void ftrace_regs_caller(void); extern void ftrace_call(void); +extern void ftrace_regs_call(void); extern void mcount_call(void); void ftrace_modify_all_code(int command); @@ -325,6 +416,15 @@ void ftrace_modify_all_code(int command); #ifndef FTRACE_ADDR #define FTRACE_ADDR ((unsigned long)ftrace_caller) #endif + +#ifndef FTRACE_REGS_ADDR +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +# define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller) +#else +# define FTRACE_REGS_ADDR FTRACE_ADDR +#endif +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern void ftrace_graph_caller(void); extern int ftrace_enable_ftrace_graph_caller(void); @@ -380,6 +480,39 @@ extern int ftrace_make_nop(struct module *mod, */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS +/** + * ftrace_modify_call - convert from one addr to another (no nop) + * @rec: the mcount call site record + * @old_addr: the address expected to be currently called to + * @addr: the address to change to + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * The code segment at @rec->ip should be a caller to @old_addr + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +extern int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr); +#else +/* Should never be called */ +static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + return -EINVAL; +} +#endif + /* May be defined in arch */ extern int ftrace_arch_read_dyn_info(char *buf, int size); @@ -387,7 +520,7 @@ extern int skip_trace(unsigned long ip); extern void ftrace_disable_daemon(void); extern void ftrace_enable_daemon(void); -#else +#else /* CONFIG_DYNAMIC_FTRACE */ static inline int skip_trace(unsigned long ip) { return 0; } static inline int ftrace_force_update(void) { return 0; } static inline void ftrace_disable_daemon(void) { } @@ -405,6 +538,10 @@ static inline int ftrace_text_reserved(void *start, void *end) { return 0; } +static inline unsigned long ftrace_location(unsigned long ip) +{ + return 0; +} /* * Again users of functions that have ftrace_ops may not @@ -413,6 +550,7 @@ static inline int ftrace_text_reserved(void *start, void *end) */ #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; }) #define ftrace_set_early_filter(ops, buf, enable) do { } while (0) +#define ftrace_set_filter_ip(ops, ip, remove, reset) ({ -ENODEV; }) #define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; }) #define ftrace_free_filter(ops) do { } while (0) diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index b6e1f8c00577..23755ba42abc 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -38,6 +38,7 @@ #include <linux/spinlock.h> #include <linux/rcupdate.h> #include <linux/mutex.h> +#include <linux/ftrace.h> #ifdef CONFIG_KPROBES #include <asm/kprobes.h> @@ -48,14 +49,26 @@ #define KPROBE_REENTER 0x00000004 #define KPROBE_HIT_SSDONE 0x00000008 +/* + * If function tracer is enabled and the arch supports full + * passing of pt_regs to function tracing, then kprobes can + * optimize on top of function tracing. + */ +#if defined(CONFIG_FUNCTION_TRACER) && defined(ARCH_SUPPORTS_FTRACE_SAVE_REGS) \ + && defined(ARCH_SUPPORTS_KPROBES_ON_FTRACE) +# define KPROBES_CAN_USE_FTRACE +#endif + /* Attach to insert probes on any functions which should be ignored*/ #define __kprobes __attribute__((__section__(".kprobes.text"))) + #else /* CONFIG_KPROBES */ typedef int kprobe_opcode_t; struct arch_specific_insn { int dummy; }; #define __kprobes + #endif /* CONFIG_KPROBES */ struct kprobe; @@ -128,6 +141,7 @@ struct kprobe { * NOTE: * this flag is only for optimized_kprobe. */ +#define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */ /* Has this kprobe gone ? */ static inline int kprobe_gone(struct kprobe *p) @@ -146,6 +160,13 @@ static inline int kprobe_optimized(struct kprobe *p) { return p->flags & KPROBE_FLAG_OPTIMIZED; } + +/* Is this kprobe uses ftrace ? */ +static inline int kprobe_ftrace(struct kprobe *p) +{ + return p->flags & KPROBE_FLAG_FTRACE; +} + /* * Special probe type that uses setjmp-longjmp type tricks to resume * execution at a specified entry with a matching prototype corresponding @@ -295,6 +316,12 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, #endif #endif /* CONFIG_OPTPROBES */ +#ifdef KPROBES_CAN_USE_FTRACE +extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs); +extern int arch_prepare_kprobe_ftrace(struct kprobe *p); +#endif + /* Get the kprobe at this addr (if any) - called with preemption disabled */ struct kprobe *get_kprobe(void *addr); diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 603bec2913b0..06177ba10a16 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -58,13 +58,6 @@ union ktime { typedef union ktime ktime_t; /* Kill this */ -#define KTIME_MAX ((s64)~((u64)1 << 63)) -#if (BITS_PER_LONG == 64) -# define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) -#else -# define KTIME_SEC_MAX LONG_MAX -#endif - /* * ktime_t definitions when using the 64-bit scalar representation: */ diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h index 51bf8ada6dc0..49258e0ed1c6 100644 --- a/include/linux/mv643xx_eth.h +++ b/include/linux/mv643xx_eth.h @@ -15,6 +15,8 @@ #define MV643XX_ETH_SIZE_REG_4 0x2224 #define MV643XX_ETH_BASE_ADDR_ENABLE_REG 0x2290 +#define MV643XX_TX_CSUM_DEFAULT_LIMIT 0 + struct mv643xx_eth_shared_platform_data { struct mbus_dram_target_info *dram; struct platform_device *shared_smi; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 7602ccb3f40e..28f9cee3fbc3 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -130,8 +130,10 @@ enum perf_event_sample_format { PERF_SAMPLE_STREAM_ID = 1U << 9, PERF_SAMPLE_RAW = 1U << 10, PERF_SAMPLE_BRANCH_STACK = 1U << 11, + PERF_SAMPLE_REGS_USER = 1U << 12, + PERF_SAMPLE_STACK_USER = 1U << 13, - PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 14, /* non-ABI */ }; /* @@ -163,6 +165,15 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_HV) /* + * Values to determine ABI of the registers dump. + */ +enum perf_sample_regs_abi { + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, +}; + +/* * The format of the data returned by read() on a perf event fd, * as specified by attr.read_format: * @@ -194,6 +205,8 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ #define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ #define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ + /* add: sample_stack_user */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -255,7 +268,10 @@ struct perf_event_attr { exclude_host : 1, /* don't count in host */ exclude_guest : 1, /* don't count in guest */ - __reserved_1 : 43; + exclude_callchain_kernel : 1, /* exclude kernel callchains */ + exclude_callchain_user : 1, /* exclude user callchains */ + + __reserved_1 : 41; union { __u32 wakeup_events; /* wakeup every n events */ @@ -271,7 +287,21 @@ struct perf_event_attr { __u64 bp_len; __u64 config2; /* extension of config1 */ }; - __u64 branch_sample_type; /* enum branch_sample_type */ + __u64 branch_sample_type; /* enum perf_branch_sample_type */ + + /* + * Defines set of user regs to dump on samples. + * See asm/perf_regs.h for details. + */ + __u64 sample_regs_user; + + /* + * Defines size of the user stack to dump on samples. + */ + __u32 sample_stack_user; + + /* Align to u64. */ + __u32 __reserved_2; }; /* @@ -548,6 +578,13 @@ enum perf_event_type { * char data[size];}&& PERF_SAMPLE_RAW * * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * }; */ PERF_RECORD_SAMPLE = 9, @@ -609,6 +646,7 @@ struct perf_guest_info_callbacks { #include <linux/static_key.h> #include <linux/atomic.h> #include <linux/sysfs.h> +#include <linux/perf_regs.h> #include <asm/local.h> struct perf_callchain_entry { @@ -654,6 +692,11 @@ struct perf_branch_stack { struct perf_branch_entry entries[0]; }; +struct perf_regs_user { + __u64 abi; + struct pt_regs *regs; +}; + struct task_struct; /* @@ -1133,6 +1176,8 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; struct perf_raw_record *raw; struct perf_branch_stack *br_stack; + struct perf_regs_user regs_user; + u64 stack_user_size; }; static inline void perf_sample_data_init(struct perf_sample_data *data, @@ -1142,7 +1187,10 @@ static inline void perf_sample_data_init(struct perf_sample_data *data, data->addr = addr; data->raw = NULL; data->br_stack = NULL; - data->period = period; + data->period = period; + data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE; + data->regs_user.regs = NULL; + data->stack_user_size = 0; } extern void perf_output_sample(struct perf_output_handle *handle, @@ -1290,8 +1338,10 @@ static inline bool has_branch_stack(struct perf_event *event) extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); -extern void perf_output_copy(struct perf_output_handle *handle, +extern unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); +extern unsigned int perf_output_skip(struct perf_output_handle *handle, + unsigned int len); extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); extern void perf_event_enable(struct perf_event *event); diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h new file mode 100644 index 000000000000..3c73d5fe18be --- /dev/null +++ b/include/linux/perf_regs.h @@ -0,0 +1,25 @@ +#ifndef _LINUX_PERF_REGS_H +#define _LINUX_PERF_REGS_H + +#ifdef CONFIG_HAVE_PERF_REGS +#include <asm/perf_regs.h> +u64 perf_reg_value(struct pt_regs *regs, int idx); +int perf_reg_validate(u64 mask); +u64 perf_reg_abi(struct task_struct *task); +#else +static inline u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + return 0; +} + +static inline int perf_reg_validate(u64 mask) +{ + return mask ? -ENOSYS : 0; +} + +static inline u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_NONE; +} +#endif /* CONFIG_HAVE_PERF_REGS */ +#endif /* _LINUX_PERF_REGS_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index b8c86648a2f9..3667c332e61d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -446,6 +446,8 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ #define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ +#define MMF_HAS_UPROBES 19 /* might have uprobes */ + #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) struct sighand_struct { diff --git a/include/linux/time.h b/include/linux/time.h index c81c5e40fcb5..b0bbd8f0130d 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -107,11 +107,29 @@ static inline struct timespec timespec_sub(struct timespec lhs, return ts_delta; } +#define KTIME_MAX ((s64)~((u64)1 << 63)) +#if (BITS_PER_LONG == 64) +# define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) +#else +# define KTIME_SEC_MAX LONG_MAX +#endif + /* * Returns true if the timespec is norm, false if denorm: */ -#define timespec_valid(ts) \ - (((ts)->tv_sec >= 0) && (((unsigned long) (ts)->tv_nsec) < NSEC_PER_SEC)) +static inline bool timespec_valid(const struct timespec *ts) +{ + /* Dates before 1970 are bogus */ + if (ts->tv_sec < 0) + return false; + /* Can't have more nanoseconds then a second */ + if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) + return false; + /* Disallow values that could overflow ktime_t */ + if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) + return false; + return true; +} extern void read_persistent_clock(struct timespec *ts); extern void read_boot_clock(struct timespec *ts); diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index efe4b3308c74..6d4fe79a1a6a 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -99,15 +99,16 @@ struct xol_area { struct uprobes_state { struct xol_area *xol_area; - atomic_t count; }; + extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); -extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify); +extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_mmap(struct vm_area_struct *vma); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm); extern void uprobe_free_utask(struct task_struct *t); extern void uprobe_copy_process(struct task_struct *t); extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); @@ -117,7 +118,6 @@ extern void uprobe_notify_resume(struct pt_regs *regs); extern bool uprobe_deny_signal(void); extern bool __weak arch_uprobe_skip_sstep(struct arch_uprobe *aup, struct pt_regs *regs); extern void uprobe_clear_state(struct mm_struct *mm); -extern void uprobe_reset_state(struct mm_struct *mm); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; @@ -138,6 +138,10 @@ static inline void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { } +static inline void +uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) +{ +} static inline void uprobe_notify_resume(struct pt_regs *regs) { } @@ -158,8 +162,5 @@ static inline void uprobe_copy_process(struct task_struct *t) static inline void uprobe_clear_state(struct mm_struct *mm) { } -static inline void uprobe_reset_state(struct mm_struct *mm) -{ -} #endif /* !CONFIG_UPROBES */ #endif /* _LINUX_UPROBES_H */ diff --git a/include/xen/events.h b/include/xen/events.h index 9c641deb65d2..04399b28e821 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -58,8 +58,6 @@ void notify_remote_via_irq(int irq); void xen_irq_resume(void); -void xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn); - /* Clear an irq's pending state, in preparation for polling on it */ void xen_clear_irq_pending(int irq); void xen_set_irq_pending(int irq); diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 98d4597f43d6..c77206184b8b 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -159,6 +159,11 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) int rctx; struct perf_callchain_entry *entry; + int kernel = !event->attr.exclude_callchain_kernel; + int user = !event->attr.exclude_callchain_user; + + if (!kernel && !user) + return NULL; entry = get_callchain_entry(&rctx); if (rctx == -1) @@ -169,24 +174,29 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs) entry->nr = 0; - if (!user_mode(regs)) { + if (kernel && !user_mode(regs)) { perf_callchain_store(entry, PERF_CONTEXT_KERNEL); perf_callchain_kernel(entry, regs); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; } - if (regs) { - /* - * Disallow cross-task user callchains. - */ - if (event->ctx->task && event->ctx->task != current) - goto exit_put; - - perf_callchain_store(entry, PERF_CONTEXT_USER); - perf_callchain_user(entry, regs); + if (user) { + if (!user_mode(regs)) { + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + /* + * Disallow cross-task user callchains. + */ + if (event->ctx->task && event->ctx->task != current) + goto exit_put; + + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_user(entry, regs); + } } exit_put: diff --git a/kernel/events/core.c b/kernel/events/core.c index b7935fcec7d9..2ba890450d15 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -36,6 +36,7 @@ #include <linux/perf_event.h> #include <linux/ftrace_event.h> #include <linux/hw_breakpoint.h> +#include <linux/mm_types.h> #include "internal.h" @@ -3756,6 +3757,132 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) } EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); +static void +perf_output_sample_regs(struct perf_output_handle *handle, + struct pt_regs *regs, u64 mask) +{ + int bit; + + for_each_set_bit(bit, (const unsigned long *) &mask, + sizeof(mask) * BITS_PER_BYTE) { + u64 val; + + val = perf_reg_value(regs, bit); + perf_output_put(handle, val); + } +} + +static void perf_sample_regs_user(struct perf_regs_user *regs_user, + struct pt_regs *regs) +{ + if (!user_mode(regs)) { + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + regs_user->regs = regs; + regs_user->abi = perf_reg_abi(current); + } +} + +/* + * Get remaining task size from user stack pointer. + * + * It'd be better to take stack vma map and limit this more + * precisly, but there's no way to get it safely under interrupt, + * so using TASK_SIZE as limit. + */ +static u64 perf_ustack_task_size(struct pt_regs *regs) +{ + unsigned long addr = perf_user_stack_pointer(regs); + + if (!addr || addr >= TASK_SIZE) + return 0; + + return TASK_SIZE - addr; +} + +static u16 +perf_sample_ustack_size(u16 stack_size, u16 header_size, + struct pt_regs *regs) +{ + u64 task_size; + + /* No regs, no stack pointer, no dump. */ + if (!regs) + return 0; + + /* + * Check if we fit in with the requested stack size into the: + * - TASK_SIZE + * If we don't, we limit the size to the TASK_SIZE. + * + * - remaining sample size + * If we don't, we customize the stack size to + * fit in to the remaining sample size. + */ + + task_size = min((u64) USHRT_MAX, perf_ustack_task_size(regs)); + stack_size = min(stack_size, (u16) task_size); + + /* Current header size plus static size and dynamic size. */ + header_size += 2 * sizeof(u64); + + /* Do we fit in with the current stack dump size? */ + if ((u16) (header_size + stack_size) < header_size) { + /* + * If we overflow the maximum size for the sample, + * we customize the stack dump size to fit in. + */ + stack_size = USHRT_MAX - header_size - sizeof(u64); + stack_size = round_up(stack_size, sizeof(u64)); + } + + return stack_size; +} + +static void +perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size, + struct pt_regs *regs) +{ + /* Case of a kernel thread, nothing to dump */ + if (!regs) { + u64 size = 0; + perf_output_put(handle, size); + } else { + unsigned long sp; + unsigned int rem; + u64 dyn_size; + + /* + * We dump: + * static size + * - the size requested by user or the best one we can fit + * in to the sample max size + * data + * - user stack dump data + * dynamic size + * - the actual dumped size + */ + + /* Static size. */ + perf_output_put(handle, dump_size); + + /* Data. */ + sp = perf_user_stack_pointer(regs); + rem = __output_copy_user(handle, (void *) sp, dump_size); + dyn_size = dump_size - rem; + + perf_output_skip(handle, rem); + + /* Dynamic size. */ + perf_output_put(handle, dyn_size); + } +} + static void __perf_event_header__init_id(struct perf_event_header *header, struct perf_sample_data *data, struct perf_event *event) @@ -4016,6 +4143,28 @@ void perf_output_sample(struct perf_output_handle *handle, perf_output_put(handle, nr); } } + + if (sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi = data->regs_user.abi; + + /* + * If there are no regs to dump, notice it through + * first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE). + */ + perf_output_put(handle, abi); + + if (abi) { + u64 mask = event->attr.sample_regs_user; + perf_output_sample_regs(handle, + data->regs_user.regs, + mask); + } + } + + if (sample_type & PERF_SAMPLE_STACK_USER) + perf_output_sample_ustack(handle, + data->stack_user_size, + data->regs_user.regs); } void perf_prepare_sample(struct perf_event_header *header, @@ -4067,6 +4216,49 @@ void perf_prepare_sample(struct perf_event_header *header, } header->size += size; } + + if (sample_type & PERF_SAMPLE_REGS_USER) { + /* regs dump ABI info */ + int size = sizeof(u64); + + perf_sample_regs_user(&data->regs_user, regs); + + if (data->regs_user.regs) { + u64 mask = event->attr.sample_regs_user; + size += hweight64(mask) * sizeof(u64); + } + + header->size += size; + } + + if (sample_type & PERF_SAMPLE_STACK_USER) { + /* + * Either we need PERF_SAMPLE_STACK_USER bit to be allways + * processed as the last one or have additional check added + * in case new sample type is added, because we could eat + * up the rest of the sample size. + */ + struct perf_regs_user *uregs = &data->regs_user; + u16 stack_size = event->attr.sample_stack_user; + u16 size = sizeof(u64); + + if (!uregs->abi) + perf_sample_regs_user(uregs, regs); + + stack_size = perf_sample_ustack_size(stack_size, header->size, + uregs->regs); + + /* + * If there is something to dump, add space for the dump + * itself and for the field that tells the dynamic size, + * which is how many have been actually dumped. + */ + if (stack_size) + size += sizeof(u64) + stack_size; + + data->stack_user_size = stack_size; + header->size += size; + } } static void perf_event_output(struct perf_event *event, @@ -6142,6 +6334,28 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, attr->branch_sample_type = mask; } } + + if (attr->sample_type & PERF_SAMPLE_REGS_USER) { + ret = perf_reg_validate(attr->sample_regs_user); + if (ret) + return ret; + } + + if (attr->sample_type & PERF_SAMPLE_STACK_USER) { + if (!arch_perf_have_user_stack_dump()) + return -ENOSYS; + + /* + * We have __u32 type for the size, but so far + * we can only use __u16 as maximum due to the + * __u16 sample size limit. + */ + if (attr->sample_stack_user >= USHRT_MAX) + ret = -EINVAL; + else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64))) + ret = -EINVAL; + } + out: return ret; diff --git a/kernel/events/internal.h b/kernel/events/internal.h index a096c19f2c2a..d56a64c99a8b 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -2,6 +2,7 @@ #define _KERNEL_EVENTS_INTERNAL_H #include <linux/hardirq.h> +#include <linux/uaccess.h> /* Buffer handling */ @@ -76,30 +77,53 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb) return rb->nr_pages << (PAGE_SHIFT + page_order(rb)); } -static inline void -__output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len) +#define DEFINE_OUTPUT_COPY(func_name, memcpy_func) \ +static inline unsigned int \ +func_name(struct perf_output_handle *handle, \ + const void *buf, unsigned int len) \ +{ \ + unsigned long size, written; \ + \ + do { \ + size = min_t(unsigned long, handle->size, len); \ + \ + written = memcpy_func(handle->addr, buf, size); \ + \ + len -= written; \ + handle->addr += written; \ + buf += written; \ + handle->size -= written; \ + if (!handle->size) { \ + struct ring_buffer *rb = handle->rb; \ + \ + handle->page++; \ + handle->page &= rb->nr_pages - 1; \ + handle->addr = rb->data_pages[handle->page]; \ + handle->size = PAGE_SIZE << page_order(rb); \ + } \ + } while (len && written == size); \ + \ + return len; \ +} + +static inline int memcpy_common(void *dst, const void *src, size_t n) { - do { - unsigned long size = min_t(unsigned long, handle->size, len); - - memcpy(handle->addr, buf, size); - - len -= size; - handle->addr += size; - buf += size; - handle->size -= size; - if (!handle->size) { - struct ring_buffer *rb = handle->rb; - - handle->page++; - handle->page &= rb->nr_pages - 1; - handle->addr = rb->data_pages[handle->page]; - handle->size = PAGE_SIZE << page_order(rb); - } - } while (len); + memcpy(dst, src, n); + return n; } +DEFINE_OUTPUT_COPY(__output_copy, memcpy_common) + +#define MEMCPY_SKIP(dst, src, n) (n) + +DEFINE_OUTPUT_COPY(__output_skip, MEMCPY_SKIP) + +#ifndef arch_perf_out_copy_user +#define arch_perf_out_copy_user __copy_from_user_inatomic +#endif + +DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) + /* Callchain handling */ extern struct perf_callchain_entry * perf_callchain(struct perf_event *event, struct pt_regs *regs); @@ -134,4 +158,20 @@ static inline void put_recursion_context(int *recursion, int rctx) recursion[rctx]--; } +#ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP +static inline bool arch_perf_have_user_stack_dump(void) +{ + return true; +} + +#define perf_user_stack_pointer(regs) user_stack_pointer(regs) +#else +static inline bool arch_perf_have_user_stack_dump(void) +{ + return false; +} + +#define perf_user_stack_pointer(regs) 0 +#endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */ + #endif /* _KERNEL_EVENTS_INTERNAL_H */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 6ddaba43fb7a..23cb34ff3973 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -182,10 +182,16 @@ out: return -ENOSPC; } -void perf_output_copy(struct perf_output_handle *handle, +unsigned int perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len) { - __output_copy(handle, buf, len); + return __output_copy(handle, buf, len); +} + +unsigned int perf_output_skip(struct perf_output_handle *handle, + unsigned int len) +{ + return __output_skip(handle, NULL, len); } void perf_output_end(struct perf_output_handle *handle) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index c08a22d02f72..1666632e6edf 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -280,12 +280,10 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_ if (ret <= 0) return ret; - lock_page(page); vaddr_new = kmap_atomic(page); vaddr &= ~PAGE_MASK; memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE); kunmap_atomic(vaddr_new); - unlock_page(page); put_page(page); @@ -334,7 +332,7 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned */ result = is_swbp_at_addr(mm, vaddr); if (result == 1) - return -EEXIST; + return 0; if (result) return result; @@ -347,24 +345,22 @@ int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned * @mm: the probed process address space. * @auprobe: arch specific probepoint information. * @vaddr: the virtual address to insert the opcode. - * @verify: if true, verify existance of breakpoint instruction. * * For mm @mm, restore the original opcode (opcode) at @vaddr. * Return 0 (success) or a negative errno. */ int __weak -set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, bool verify) +set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) { - if (verify) { - int result; + int result; - result = is_swbp_at_addr(mm, vaddr); - if (!result) - return -EINVAL; + result = is_swbp_at_addr(mm, vaddr); + if (!result) + return -EINVAL; + + if (result != 1) + return result; - if (result != 1) - return result; - } return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn); } @@ -649,6 +645,7 @@ static int install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long vaddr) { + bool first_uprobe; int ret; /* @@ -659,7 +656,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, * Hence behave as if probe already existed. */ if (!uprobe->consumers) - return -EEXIST; + return 0; if (!(uprobe->flags & UPROBE_COPY_INSN)) { ret = copy_insn(uprobe, vma->vm_file); @@ -681,17 +678,16 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, } /* - * Ideally, should be updating the probe count after the breakpoint - * has been successfully inserted. However a thread could hit the - * breakpoint we just inserted even before the probe count is - * incremented. If this is the first breakpoint placed, breakpoint - * notifier might ignore uprobes and pass the trap to the thread. - * Hence increment before and decrement on failure. + * set MMF_HAS_UPROBES in advance for uprobe_pre_sstep_notifier(), + * the task can hit this breakpoint right after __replace_page(). */ - atomic_inc(&mm->uprobes_state.count); + first_uprobe = !test_bit(MMF_HAS_UPROBES, &mm->flags); + if (first_uprobe) + set_bit(MMF_HAS_UPROBES, &mm->flags); + ret = set_swbp(&uprobe->arch, mm, vaddr); - if (ret) - atomic_dec(&mm->uprobes_state.count); + if (ret && first_uprobe) + clear_bit(MMF_HAS_UPROBES, &mm->flags); return ret; } @@ -699,8 +695,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, static void remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) { - if (!set_orig_insn(&uprobe->arch, mm, vaddr, true)) - atomic_dec(&mm->uprobes_state.count); + set_orig_insn(&uprobe->arch, mm, vaddr); } /* @@ -831,17 +826,11 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) vaddr_to_offset(vma, info->vaddr) != uprobe->offset) goto unlock; - if (is_register) { + if (is_register) err = install_breakpoint(uprobe, mm, vma, info->vaddr); - /* - * We can race against uprobe_mmap(), see the - * comment near uprobe_hash(). - */ - if (err == -EEXIST) - err = 0; - } else { + else remove_breakpoint(uprobe, mm, info->vaddr); - } + unlock: up_write(&mm->mmap_sem); free: @@ -1008,23 +997,16 @@ static void build_probe_list(struct inode *inode, } /* - * Called from mmap_region. - * called with mm->mmap_sem acquired. - * - * Return -ve no if we fail to insert probes and we cannot - * bail-out. - * Return 0 otherwise. i.e: + * Called from mmap_region/vma_adjust with mm->mmap_sem acquired. * - * - successful insertion of probes - * - (or) no possible probes to be inserted. - * - (or) insertion of probes failed but we can bail-out. + * Currently we ignore all errors and always return 0, the callers + * can't handle the failure anyway. */ int uprobe_mmap(struct vm_area_struct *vma) { struct list_head tmp_list; struct uprobe *uprobe, *u; struct inode *inode; - int ret, count; if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) return 0; @@ -1036,44 +1018,16 @@ int uprobe_mmap(struct vm_area_struct *vma) mutex_lock(uprobes_mmap_hash(inode)); build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); - ret = 0; - count = 0; - list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { - if (!ret) { + if (!fatal_signal_pending(current)) { unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); - - ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); - /* - * We can race against uprobe_register(), see the - * comment near uprobe_hash(). - */ - if (ret == -EEXIST) { - ret = 0; - - if (!is_swbp_at_addr(vma->vm_mm, vaddr)) - continue; - - /* - * Unable to insert a breakpoint, but - * breakpoint lies underneath. Increment the - * probe count. - */ - atomic_inc(&vma->vm_mm->uprobes_state.count); - } - - if (!ret) - count++; + install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); } put_uprobe(uprobe); } - mutex_unlock(uprobes_mmap_hash(inode)); - if (ret) - atomic_sub(count, &vma->vm_mm->uprobes_state.count); - - return ret; + return 0; } /* @@ -1081,37 +1035,16 @@ int uprobe_mmap(struct vm_area_struct *vma) */ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - struct list_head tmp_list; - struct uprobe *uprobe, *u; - struct inode *inode; - if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) return; if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ return; - if (!atomic_read(&vma->vm_mm->uprobes_state.count)) - return; - - inode = vma->vm_file->f_mapping->host; - if (!inode) + if (!test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags)) return; - mutex_lock(uprobes_mmap_hash(inode)); - build_probe_list(inode, vma, start, end, &tmp_list); - - list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { - unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); - /* - * An unregister could have removed the probe before - * unmap. So check before we decrement the count. - */ - if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) - atomic_dec(&vma->vm_mm->uprobes_state.count); - put_uprobe(uprobe); - } - mutex_unlock(uprobes_mmap_hash(inode)); + /* TODO: unmapping uprobe(s) will need more work */ } /* Slot allocation for XOL */ @@ -1213,13 +1146,12 @@ void uprobe_clear_state(struct mm_struct *mm) kfree(area); } -/* - * uprobe_reset_state - Free the area allocated for slots. - */ -void uprobe_reset_state(struct mm_struct *mm) +void uprobe_dup_mmap(struct mm_struct *oldmm, struct mm_struct *newmm) { - mm->uprobes_state.xol_area = NULL; - atomic_set(&mm->uprobes_state.count, 0); + newmm->uprobes_state.xol_area = NULL; + + if (test_bit(MMF_HAS_UPROBES, &oldmm->flags)) + set_bit(MMF_HAS_UPROBES, &newmm->flags); } /* @@ -1518,17 +1450,15 @@ cleanup_ret: utask->active_uprobe = NULL; utask->state = UTASK_RUNNING; } - if (uprobe) { - if (!(uprobe->flags & UPROBE_SKIP_SSTEP)) + if (!(uprobe->flags & UPROBE_SKIP_SSTEP)) - /* - * cannot singlestep; cannot skip instruction; - * re-execute the instruction. - */ - instruction_pointer_set(regs, bp_vaddr); + /* + * cannot singlestep; cannot skip instruction; + * re-execute the instruction. + */ + instruction_pointer_set(regs, bp_vaddr); - put_uprobe(uprobe); - } + put_uprobe(uprobe); } /* @@ -1589,8 +1519,7 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs) { struct uprobe_task *utask; - if (!current->mm || !atomic_read(¤t->mm->uprobes_state.count)) - /* task is currently not uprobed */ + if (!current->mm || !test_bit(MMF_HAS_UPROBES, ¤t->mm->flags)) return 0; utask = current->utask; diff --git a/kernel/fork.c b/kernel/fork.c index 3bd2280d79f6..2343c9eaaaf4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -353,6 +353,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) down_write(&oldmm->mmap_sem); flush_cache_dup_mm(oldmm); + uprobe_dup_mmap(oldmm, mm); /* * Not linked in yet - no deadlock potential: */ @@ -454,9 +455,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (retval) goto out; - - if (file && uprobe_mmap(tmp)) - goto out; } /* a new mm has just been created */ arch_dup_mmap(oldmm, mm); @@ -839,8 +837,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk) #ifdef CONFIG_TRANSPARENT_HUGEPAGE mm->pmd_huge_pte = NULL; #endif - uprobe_reset_state(mm); - if (!mm_init(mm, tsk)) goto fail_nomem; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index c62b8546cc90..35b4315d84f5 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -561,9 +561,9 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) { LIST_HEAD(free_list); + mutex_lock(&kprobe_mutex); /* Lock modules while optimizing kprobes */ mutex_lock(&module_mutex); - mutex_lock(&kprobe_mutex); /* * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed) @@ -586,8 +586,8 @@ static __kprobes void kprobe_optimizer(struct work_struct *work) /* Step 4: Free cleaned kprobes after quiesence period */ do_free_cleaned_kprobes(&free_list); - mutex_unlock(&kprobe_mutex); mutex_unlock(&module_mutex); + mutex_unlock(&kprobe_mutex); /* Step 5: Kick optimizer again if needed */ if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) @@ -759,20 +759,32 @@ static __kprobes void try_to_optimize_kprobe(struct kprobe *p) struct kprobe *ap; struct optimized_kprobe *op; + /* Impossible to optimize ftrace-based kprobe */ + if (kprobe_ftrace(p)) + return; + + /* For preparing optimization, jump_label_text_reserved() is called */ + jump_label_lock(); + mutex_lock(&text_mutex); + ap = alloc_aggr_kprobe(p); if (!ap) - return; + goto out; op = container_of(ap, struct optimized_kprobe, kp); if (!arch_prepared_optinsn(&op->optinsn)) { /* If failed to setup optimizing, fallback to kprobe */ arch_remove_optimized_kprobe(op); kfree(op); - return; + goto out; } init_aggr_kprobe(ap, p); - optimize_kprobe(ap); + optimize_kprobe(ap); /* This just kicks optimizer thread */ + +out: + mutex_unlock(&text_mutex); + jump_label_unlock(); } #ifdef CONFIG_SYSCTL @@ -907,9 +919,64 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) } #endif /* CONFIG_OPTPROBES */ +#ifdef KPROBES_CAN_USE_FTRACE +static struct ftrace_ops kprobe_ftrace_ops __read_mostly = { + .func = kprobe_ftrace_handler, + .flags = FTRACE_OPS_FL_SAVE_REGS, +}; +static int kprobe_ftrace_enabled; + +/* Must ensure p->addr is really on ftrace */ +static int __kprobes prepare_kprobe(struct kprobe *p) +{ + if (!kprobe_ftrace(p)) + return arch_prepare_kprobe(p); + + return arch_prepare_kprobe_ftrace(p); +} + +/* Caller must lock kprobe_mutex */ +static void __kprobes arm_kprobe_ftrace(struct kprobe *p) +{ + int ret; + + ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, + (unsigned long)p->addr, 0, 0); + WARN(ret < 0, "Failed to arm kprobe-ftrace at %p (%d)\n", p->addr, ret); + kprobe_ftrace_enabled++; + if (kprobe_ftrace_enabled == 1) { + ret = register_ftrace_function(&kprobe_ftrace_ops); + WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret); + } +} + +/* Caller must lock kprobe_mutex */ +static void __kprobes disarm_kprobe_ftrace(struct kprobe *p) +{ + int ret; + + kprobe_ftrace_enabled--; + if (kprobe_ftrace_enabled == 0) { + ret = unregister_ftrace_function(&kprobe_ftrace_ops); + WARN(ret < 0, "Failed to init kprobe-ftrace (%d)\n", ret); + } + ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, + (unsigned long)p->addr, 1, 0); + WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret); +} +#else /* !KPROBES_CAN_USE_FTRACE */ +#define prepare_kprobe(p) arch_prepare_kprobe(p) +#define arm_kprobe_ftrace(p) do {} while (0) +#define disarm_kprobe_ftrace(p) do {} while (0) +#endif + /* Arm a kprobe with text_mutex */ static void __kprobes arm_kprobe(struct kprobe *kp) { + if (unlikely(kprobe_ftrace(kp))) { + arm_kprobe_ftrace(kp); + return; + } /* * Here, since __arm_kprobe() doesn't use stop_machine(), * this doesn't cause deadlock on text_mutex. So, we don't @@ -921,11 +988,15 @@ static void __kprobes arm_kprobe(struct kprobe *kp) } /* Disarm a kprobe with text_mutex */ -static void __kprobes disarm_kprobe(struct kprobe *kp) +static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt) { + if (unlikely(kprobe_ftrace(kp))) { + disarm_kprobe_ftrace(kp); + return; + } /* Ditto */ mutex_lock(&text_mutex); - __disarm_kprobe(kp, true); + __disarm_kprobe(kp, reopt); mutex_unlock(&text_mutex); } @@ -1144,12 +1215,6 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) if (p->post_handler && !ap->post_handler) ap->post_handler = aggr_post_handler; - if (kprobe_disabled(ap) && !kprobe_disabled(p)) { - ap->flags &= ~KPROBE_FLAG_DISABLED; - if (!kprobes_all_disarmed) - /* Arm the breakpoint again. */ - __arm_kprobe(ap); - } return 0; } @@ -1189,11 +1254,22 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, int ret = 0; struct kprobe *ap = orig_p; + /* For preparing optimization, jump_label_text_reserved() is called */ + jump_label_lock(); + /* + * Get online CPUs to avoid text_mutex deadlock.with stop machine, + * which is invoked by unoptimize_kprobe() in add_new_kprobe() + */ + get_online_cpus(); + mutex_lock(&text_mutex); + if (!kprobe_aggrprobe(orig_p)) { /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */ ap = alloc_aggr_kprobe(orig_p); - if (!ap) - return -ENOMEM; + if (!ap) { + ret = -ENOMEM; + goto out; + } init_aggr_kprobe(ap, orig_p); } else if (kprobe_unused(ap)) /* This probe is going to die. Rescue it */ @@ -1213,7 +1289,7 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, * free aggr_probe. It will be used next time, or * freed by unregister_kprobe. */ - return ret; + goto out; /* Prepare optimized instructions if possible. */ prepare_optimized_kprobe(ap); @@ -1228,7 +1304,20 @@ static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, /* Copy ap's insn slot to p */ copy_kprobe(ap, p); - return add_new_kprobe(ap, p); + ret = add_new_kprobe(ap, p); + +out: + mutex_unlock(&text_mutex); + put_online_cpus(); + jump_label_unlock(); + + if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) { + ap->flags &= ~KPROBE_FLAG_DISABLED; + if (!kprobes_all_disarmed) + /* Arm the breakpoint again. */ + arm_kprobe(ap); + } + return ret; } static int __kprobes in_kprobes_functions(unsigned long addr) @@ -1313,71 +1402,99 @@ static inline int check_kprobe_rereg(struct kprobe *p) return ret; } -int __kprobes register_kprobe(struct kprobe *p) +static __kprobes int check_kprobe_address_safe(struct kprobe *p, + struct module **probed_mod) { int ret = 0; - struct kprobe *old_p; - struct module *probed_mod; - kprobe_opcode_t *addr; - - addr = kprobe_addr(p); - if (IS_ERR(addr)) - return PTR_ERR(addr); - p->addr = addr; + unsigned long ftrace_addr; - ret = check_kprobe_rereg(p); - if (ret) - return ret; + /* + * If the address is located on a ftrace nop, set the + * breakpoint to the following instruction. + */ + ftrace_addr = ftrace_location((unsigned long)p->addr); + if (ftrace_addr) { +#ifdef KPROBES_CAN_USE_FTRACE + /* Given address is not on the instruction boundary */ + if ((unsigned long)p->addr != ftrace_addr) + return -EILSEQ; + /* break_handler (jprobe) can not work with ftrace */ + if (p->break_handler) + return -EINVAL; + p->flags |= KPROBE_FLAG_FTRACE; +#else /* !KPROBES_CAN_USE_FTRACE */ + return -EINVAL; +#endif + } jump_label_lock(); preempt_disable(); + + /* Ensure it is not in reserved area nor out of text */ if (!kernel_text_address((unsigned long) p->addr) || in_kprobes_functions((unsigned long) p->addr) || - ftrace_text_reserved(p->addr, p->addr) || jump_label_text_reserved(p->addr, p->addr)) { ret = -EINVAL; - goto cannot_probe; + goto out; } - /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ - p->flags &= KPROBE_FLAG_DISABLED; - - /* - * Check if are we probing a module. - */ - probed_mod = __module_text_address((unsigned long) p->addr); - if (probed_mod) { - /* Return -ENOENT if fail. */ - ret = -ENOENT; + /* Check if are we probing a module */ + *probed_mod = __module_text_address((unsigned long) p->addr); + if (*probed_mod) { /* * We must hold a refcount of the probed module while updating * its code to prohibit unexpected unloading. */ - if (unlikely(!try_module_get(probed_mod))) - goto cannot_probe; + if (unlikely(!try_module_get(*probed_mod))) { + ret = -ENOENT; + goto out; + } /* * If the module freed .init.text, we couldn't insert * kprobes in there. */ - if (within_module_init((unsigned long)p->addr, probed_mod) && - probed_mod->state != MODULE_STATE_COMING) { - module_put(probed_mod); - goto cannot_probe; + if (within_module_init((unsigned long)p->addr, *probed_mod) && + (*probed_mod)->state != MODULE_STATE_COMING) { + module_put(*probed_mod); + *probed_mod = NULL; + ret = -ENOENT; } - /* ret will be updated by following code */ } +out: preempt_enable(); jump_label_unlock(); + return ret; +} + +int __kprobes register_kprobe(struct kprobe *p) +{ + int ret; + struct kprobe *old_p; + struct module *probed_mod; + kprobe_opcode_t *addr; + + /* Adjust probe address from symbol */ + addr = kprobe_addr(p); + if (IS_ERR(addr)) + return PTR_ERR(addr); + p->addr = addr; + + ret = check_kprobe_rereg(p); + if (ret) + return ret; + + /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ + p->flags &= KPROBE_FLAG_DISABLED; p->nmissed = 0; INIT_LIST_HEAD(&p->list); - mutex_lock(&kprobe_mutex); - jump_label_lock(); /* needed to call jump_label_text_reserved() */ + ret = check_kprobe_address_safe(p, &probed_mod); + if (ret) + return ret; - get_online_cpus(); /* For avoiding text_mutex deadlock. */ - mutex_lock(&text_mutex); + mutex_lock(&kprobe_mutex); old_p = get_kprobe(p->addr); if (old_p) { @@ -1386,7 +1503,9 @@ int __kprobes register_kprobe(struct kprobe *p) goto out; } - ret = arch_prepare_kprobe(p); + mutex_lock(&text_mutex); /* Avoiding text modification */ + ret = prepare_kprobe(p); + mutex_unlock(&text_mutex); if (ret) goto out; @@ -1395,26 +1514,18 @@ int __kprobes register_kprobe(struct kprobe *p) &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); if (!kprobes_all_disarmed && !kprobe_disabled(p)) - __arm_kprobe(p); + arm_kprobe(p); /* Try to optimize kprobe */ try_to_optimize_kprobe(p); out: - mutex_unlock(&text_mutex); - put_online_cpus(); - jump_label_unlock(); mutex_unlock(&kprobe_mutex); if (probed_mod) module_put(probed_mod); return ret; - -cannot_probe: - preempt_enable(); - jump_label_unlock(); - return ret; } EXPORT_SYMBOL_GPL(register_kprobe); @@ -1451,7 +1562,7 @@ static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p) /* Try to disarm and disable this/parent probe */ if (p == orig_p || aggr_kprobe_disabled(orig_p)) { - disarm_kprobe(orig_p); + disarm_kprobe(orig_p, true); orig_p->flags |= KPROBE_FLAG_DISABLED; } } @@ -2049,10 +2160,11 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, if (!pp) pp = p; - seq_printf(pi, "%s%s%s\n", + seq_printf(pi, "%s%s%s%s\n", (kprobe_gone(p) ? "[GONE]" : ""), ((kprobe_disabled(p) && !kprobe_gone(p)) ? "[DISABLED]" : ""), - (kprobe_optimized(pp) ? "[OPTIMIZED]" : "")); + (kprobe_optimized(pp) ? "[OPTIMIZED]" : ""), + (kprobe_ftrace(pp) ? "[FTRACE]" : "")); } static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) @@ -2131,14 +2243,12 @@ static void __kprobes arm_all_kprobes(void) goto already_enabled; /* Arming kprobes doesn't optimize kprobe itself */ - mutex_lock(&text_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) if (!kprobe_disabled(p)) - __arm_kprobe(p); + arm_kprobe(p); } - mutex_unlock(&text_mutex); kprobes_all_disarmed = false; printk(KERN_INFO "Kprobes globally enabled\n"); @@ -2166,15 +2276,13 @@ static void __kprobes disarm_all_kprobes(void) kprobes_all_disarmed = true; printk(KERN_INFO "Kprobes globally disabled\n"); - mutex_lock(&text_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry_rcu(p, node, head, hlist) { if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) - __disarm_kprobe(p, false); + disarm_kprobe(p, false); } } - mutex_unlock(&text_mutex); mutex_unlock(&kprobe_mutex); /* Wait for disarming all kprobes by optimizer */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e16af197a2bc..0c1485e42be6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -115,6 +115,7 @@ static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts) { tk->xtime_sec += ts->tv_sec; tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift; + tk_normalize_xtime(tk); } static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm) @@ -276,7 +277,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) tk->xtime_nsec += cycle_delta * tk->mult; /* If arch requires, add in gettimeoffset() */ - tk->xtime_nsec += arch_gettimeoffset() << tk->shift; + tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift; tk_normalize_xtime(tk); @@ -427,7 +428,7 @@ int do_settimeofday(const struct timespec *tv) struct timespec ts_delta, xt; unsigned long flags; - if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) + if (!timespec_valid(tv)) return -EINVAL; write_seqlock_irqsave(&tk->lock, flags); @@ -463,6 +464,8 @@ int timekeeping_inject_offset(struct timespec *ts) { struct timekeeper *tk = &timekeeper; unsigned long flags; + struct timespec tmp; + int ret = 0; if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) return -EINVAL; @@ -471,10 +474,17 @@ int timekeeping_inject_offset(struct timespec *ts) timekeeping_forward_now(tk); + /* Make sure the proposed value is valid */ + tmp = timespec_add(tk_xtime(tk), *ts); + if (!timespec_valid(&tmp)) { + ret = -EINVAL; + goto error; + } tk_xtime_add(tk, ts); tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); +error: /* even if we error out, we forwarded the time, so call update */ timekeeping_update(tk, true); write_sequnlock_irqrestore(&tk->lock, flags); @@ -482,7 +492,7 @@ int timekeeping_inject_offset(struct timespec *ts) /* signal hrtimers about time change */ clock_was_set(); - return 0; + return ret; } EXPORT_SYMBOL(timekeeping_inject_offset); @@ -649,7 +659,20 @@ void __init timekeeping_init(void) struct timespec now, boot, tmp; read_persistent_clock(&now); + if (!timespec_valid(&now)) { + pr_warn("WARNING: Persistent clock returned invalid value!\n" + " Check your CMOS/BIOS settings.\n"); + now.tv_sec = 0; + now.tv_nsec = 0; + } + read_boot_clock(&boot); + if (!timespec_valid(&boot)) { + pr_warn("WARNING: Boot clock returned invalid value!\n" + " Check your CMOS/BIOS settings.\n"); + boot.tv_sec = 0; + boot.tv_nsec = 0; + } seqlock_init(&tk->lock); @@ -1129,6 +1152,10 @@ static void update_wall_time(void) offset = (clock->read(clock) - clock->cycle_last) & clock->mask; #endif + /* Check if there's really nothing to do */ + if (offset < tk->cycle_interval) + goto out; + /* * With NO_HZ we may have to accumulate many cycle_intervals * (think "ticks") worth of time at once. To do this efficiently, @@ -1161,9 +1188,9 @@ static void update_wall_time(void) * the vsyscall implementations are converted to use xtime_nsec * (shifted nanoseconds), this can be killed. */ - remainder = tk->xtime_nsec & ((1 << tk->shift) - 1); + remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1); tk->xtime_nsec -= remainder; - tk->xtime_nsec += 1 << tk->shift; + tk->xtime_nsec += 1ULL << tk->shift; tk->ntp_error += remainder << tk->ntp_error_shift; /* diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 8c4c07071cc5..9301a0e35e0c 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS help See Documentation/trace/ftrace-design.txt +config HAVE_FENTRY + bool + help + Arch supports the gcc options -pg with -mfentry + config HAVE_C_RECORDMCOUNT bool help diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index b831087c8200..837090808aac 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -5,10 +5,12 @@ ifdef CONFIG_FUNCTION_TRACER ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) +ifdef CONFIG_FTRACE_SELFTEST # selftest needs instrumentation CFLAGS_trace_selftest_dynamic.o = -pg obj-y += trace_selftest_dynamic.o endif +endif # If unlikely tracing is enabled, do not trace these files ifdef CONFIG_TRACING_BRANCHES diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b4f20fba09fc..9dcf15d38380 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -64,12 +64,20 @@ #define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL) +static struct ftrace_ops ftrace_list_end __read_mostly = { + .func = ftrace_stub, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, +}; + /* ftrace_enabled is a method to turn ftrace on or off */ int ftrace_enabled __read_mostly; static int last_ftrace_enabled; /* Quick disabling of function tracer. */ -int function_trace_stop; +int function_trace_stop __read_mostly; + +/* Current function tracing op */ +struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; /* List for set_ftrace_pid's pids. */ LIST_HEAD(ftrace_pids); @@ -86,22 +94,43 @@ static int ftrace_disabled __read_mostly; static DEFINE_MUTEX(ftrace_lock); -static struct ftrace_ops ftrace_list_end __read_mostly = { - .func = ftrace_stub, -}; - static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; -static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub; -ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub; ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; static struct ftrace_ops control_ops; -static void -ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); +#if ARCH_SUPPORTS_FTRACE_OPS +static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs); +#else +/* See comment below, where ftrace_ops_list_func is defined */ +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); +#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) +#endif + +/** + * ftrace_nr_registered_ops - return number of ops registered + * + * Returns the number of ftrace_ops registered and tracing functions + */ +int ftrace_nr_registered_ops(void) +{ + struct ftrace_ops *ops; + int cnt = 0; + + mutex_lock(&ftrace_lock); + + for (ops = ftrace_ops_list; + ops != &ftrace_list_end; ops = ops->next) + cnt++; + + mutex_unlock(&ftrace_lock); + + return cnt; +} /* * Traverse the ftrace_global_list, invoking all entries. The reason that we @@ -112,29 +141,29 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); * * Silly Alpha and silly pointer-speculation compiler optimizations! */ -static void ftrace_global_list_func(unsigned long ip, - unsigned long parent_ip) +static void +ftrace_global_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) { - struct ftrace_ops *op; - if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) return; trace_recursion_set(TRACE_GLOBAL_BIT); op = rcu_dereference_raw(ftrace_global_list); /*see above*/ while (op != &ftrace_list_end) { - op->func(ip, parent_ip); + op->func(ip, parent_ip, op, regs); op = rcu_dereference_raw(op->next); /*see above*/ }; trace_recursion_clear(TRACE_GLOBAL_BIT); } -static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) +static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) { if (!test_tsk_trace_trace(current)) return; - ftrace_pid_function(ip, parent_ip); + ftrace_pid_function(ip, parent_ip, op, regs); } static void set_ftrace_pid_function(ftrace_func_t func) @@ -153,25 +182,9 @@ static void set_ftrace_pid_function(ftrace_func_t func) void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; - __ftrace_trace_function = ftrace_stub; - __ftrace_trace_function_delay = ftrace_stub; ftrace_pid_function = ftrace_stub; } -#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST -/* - * For those archs that do not test ftrace_trace_stop in their - * mcount call site, we need to do it from C. - */ -static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip) -{ - if (function_trace_stop) - return; - - __ftrace_trace_function(ip, parent_ip); -} -#endif - static void control_ops_disable_all(struct ftrace_ops *ops) { int cpu; @@ -230,28 +243,27 @@ static void update_ftrace_function(void) /* * If we are at the end of the list and this ops is - * not dynamic, then have the mcount trampoline call - * the function directly + * recursion safe and not dynamic and the arch supports passing ops, + * then have the mcount trampoline call the function directly. */ if (ftrace_ops_list == &ftrace_list_end || (ftrace_ops_list->next == &ftrace_list_end && - !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC))) + !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) && + (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) && + !FTRACE_FORCE_LIST_FUNC)) { + /* Set the ftrace_ops that the arch callback uses */ + if (ftrace_ops_list == &global_ops) + function_trace_op = ftrace_global_list; + else + function_trace_op = ftrace_ops_list; func = ftrace_ops_list->func; - else + } else { + /* Just use the default ftrace_ops */ + function_trace_op = &ftrace_list_end; func = ftrace_ops_list_func; + } -#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST ftrace_trace_function = func; -#else -#ifdef CONFIG_DYNAMIC_FTRACE - /* do not update till all functions have been modified */ - __ftrace_trace_function_delay = func; -#else - __ftrace_trace_function = func; -#endif - ftrace_trace_function = - (func == ftrace_stub) ? func : ftrace_test_stop_func; -#endif } static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) @@ -325,6 +337,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops) if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK) return -EINVAL; +#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS + /* + * If the ftrace_ops specifies SAVE_REGS, then it only can be used + * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set. + * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant. + */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS && + !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)) + return -EINVAL; + + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED) + ops->flags |= FTRACE_OPS_FL_SAVE_REGS; +#endif + if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; @@ -773,7 +799,8 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip) } static void -function_profile_call(unsigned long ip, unsigned long parent_ip) +function_profile_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) { struct ftrace_profile_stat *stat; struct ftrace_profile *rec; @@ -803,7 +830,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip) #ifdef CONFIG_FUNCTION_GRAPH_TRACER static int profile_graph_entry(struct ftrace_graph_ent *trace) { - function_profile_call(trace->func, 0); + function_profile_call(trace->func, 0, NULL, NULL); return 1; } @@ -863,6 +890,7 @@ static void unregister_ftrace_profiler(void) #else static struct ftrace_ops ftrace_profile_ops __read_mostly = { .func = function_profile_call, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static int register_ftrace_profiler(void) @@ -1045,6 +1073,7 @@ static struct ftrace_ops global_ops = { .func = ftrace_stub, .notrace_hash = EMPTY_HASH, .filter_hash = EMPTY_HASH, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static DEFINE_MUTEX(ftrace_regex_lock); @@ -1525,6 +1554,12 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops, rec->flags++; if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX)) return; + /* + * If any ops wants regs saved for this function + * then all ops will get saved regs. + */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) + rec->flags |= FTRACE_FL_REGS; } else { if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0)) return; @@ -1616,18 +1651,59 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update) if (enable && (rec->flags & ~FTRACE_FL_MASK)) flag = FTRACE_FL_ENABLED; + /* + * If enabling and the REGS flag does not match the REGS_EN, then + * do not ignore this record. Set flags to fail the compare against + * ENABLED. + */ + if (flag && + (!(rec->flags & FTRACE_FL_REGS) != !(rec->flags & FTRACE_FL_REGS_EN))) + flag |= FTRACE_FL_REGS; + /* If the state of this record hasn't changed, then do nothing */ if ((rec->flags & FTRACE_FL_ENABLED) == flag) return FTRACE_UPDATE_IGNORE; if (flag) { - if (update) + /* Save off if rec is being enabled (for return value) */ + flag ^= rec->flags & FTRACE_FL_ENABLED; + + if (update) { rec->flags |= FTRACE_FL_ENABLED; - return FTRACE_UPDATE_MAKE_CALL; + if (flag & FTRACE_FL_REGS) { + if (rec->flags & FTRACE_FL_REGS) + rec->flags |= FTRACE_FL_REGS_EN; + else + rec->flags &= ~FTRACE_FL_REGS_EN; + } + } + + /* + * If this record is being updated from a nop, then + * return UPDATE_MAKE_CALL. + * Otherwise, if the EN flag is set, then return + * UPDATE_MODIFY_CALL_REGS to tell the caller to convert + * from the non-save regs, to a save regs function. + * Otherwise, + * return UPDATE_MODIFY_CALL to tell the caller to convert + * from the save regs, to a non-save regs function. + */ + if (flag & FTRACE_FL_ENABLED) + return FTRACE_UPDATE_MAKE_CALL; + else if (rec->flags & FTRACE_FL_REGS_EN) + return FTRACE_UPDATE_MODIFY_CALL_REGS; + else + return FTRACE_UPDATE_MODIFY_CALL; } - if (update) - rec->flags &= ~FTRACE_FL_ENABLED; + if (update) { + /* If there's no more users, clear all flags */ + if (!(rec->flags & ~FTRACE_FL_MASK)) + rec->flags = 0; + else + /* Just disable the record (keep REGS state) */ + rec->flags &= ~FTRACE_FL_ENABLED; + } return FTRACE_UPDATE_MAKE_NOP; } @@ -1662,13 +1738,17 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable) static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable) { + unsigned long ftrace_old_addr; unsigned long ftrace_addr; int ret; - ftrace_addr = (unsigned long)FTRACE_ADDR; - ret = ftrace_update_record(rec, enable); + if (rec->flags & FTRACE_FL_REGS) + ftrace_addr = (unsigned long)FTRACE_REGS_ADDR; + else + ftrace_addr = (unsigned long)FTRACE_ADDR; + switch (ret) { case FTRACE_UPDATE_IGNORE: return 0; @@ -1678,6 +1758,15 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) case FTRACE_UPDATE_MAKE_NOP: return ftrace_make_nop(NULL, rec, ftrace_addr); + + case FTRACE_UPDATE_MODIFY_CALL_REGS: + case FTRACE_UPDATE_MODIFY_CALL: + if (rec->flags & FTRACE_FL_REGS) + ftrace_old_addr = (unsigned long)FTRACE_ADDR; + else + ftrace_old_addr = (unsigned long)FTRACE_REGS_ADDR; + + return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); } return -1; /* unknow ftrace bug */ @@ -1882,16 +1971,6 @@ static void ftrace_run_update_code(int command) */ arch_ftrace_update_code(command); -#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST - /* - * For archs that call ftrace_test_stop_func(), we must - * wait till after we update all the function callers - * before we update the callback. This keeps different - * ops that record different functions from corrupting - * each other. - */ - __ftrace_trace_function = __ftrace_trace_function_delay; -#endif function_trace_stop--; ret = ftrace_arch_code_modify_post_process(); @@ -2441,8 +2520,9 @@ static int t_show(struct seq_file *m, void *v) seq_printf(m, "%ps", (void *)rec->ip); if (iter->flags & FTRACE_ITER_ENABLED) - seq_printf(m, " (%ld)", - rec->flags & ~FTRACE_FL_MASK); + seq_printf(m, " (%ld)%s", + rec->flags & ~FTRACE_FL_MASK, + rec->flags & FTRACE_FL_REGS ? " R" : ""); seq_printf(m, "\n"); return 0; @@ -2790,8 +2870,8 @@ static int __init ftrace_mod_cmd_init(void) } device_initcall(ftrace_mod_cmd_init); -static void -function_trace_probe_call(unsigned long ip, unsigned long parent_ip) +static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct ftrace_func_probe *entry; struct hlist_head *hhd; @@ -3162,8 +3242,27 @@ ftrace_notrace_write(struct file *file, const char __user *ubuf, } static int -ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, - int reset, int enable) +ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) +{ + struct ftrace_func_entry *entry; + + if (!ftrace_location(ip)) + return -EINVAL; + + if (remove) { + entry = ftrace_lookup_ip(hash, ip); + if (!entry) + return -ENOENT; + free_hash_entry(hash, entry); + return 0; + } + + return add_hash_entry(hash, ip); +} + +static int +ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, + unsigned long ip, int remove, int reset, int enable) { struct ftrace_hash **orig_hash; struct ftrace_hash *hash; @@ -3192,6 +3291,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, ret = -EINVAL; goto out_regex_unlock; } + if (ip) { + ret = ftrace_match_addr(hash, ip, remove); + if (ret < 0) + goto out_regex_unlock; + } mutex_lock(&ftrace_lock); ret = ftrace_hash_move(ops, enable, orig_hash, hash); @@ -3208,6 +3312,37 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, return ret; } +static int +ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove, + int reset, int enable) +{ + return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable); +} + +/** + * ftrace_set_filter_ip - set a function to filter on in ftrace by address + * @ops - the ops to set the filter with + * @ip - the address to add to or remove from the filter. + * @remove - non zero to remove the ip from the filter + * @reset - non zero to reset all filters before applying this filter. + * + * Filters denote which functions should be enabled when tracing is enabled + * If @ip is NULL, it failes to update filter. + */ +int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, + int remove, int reset) +{ + return ftrace_set_addr(ops, ip, remove, reset, 1); +} +EXPORT_SYMBOL_GPL(ftrace_set_filter_ip); + +static int +ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len, + int reset, int enable) +{ + return ftrace_set_hash(ops, buf, len, 0, 0, reset, enable); +} + /** * ftrace_set_filter - set a function to filter on in ftrace * @ops - the ops to set the filter with @@ -3912,6 +4047,7 @@ void __init ftrace_init(void) static struct ftrace_ops global_ops = { .func = ftrace_stub, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static int __init ftrace_nodyn_init(void) @@ -3942,10 +4078,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip) #endif /* CONFIG_DYNAMIC_FTRACE */ static void -ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) +ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) { - struct ftrace_ops *op; - if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT))) return; @@ -3959,7 +4094,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) while (op != &ftrace_list_end) { if (!ftrace_function_local_disabled(op) && ftrace_ops_test(op, ip)) - op->func(ip, parent_ip); + op->func(ip, parent_ip, op, regs); op = rcu_dereference_raw(op->next); }; @@ -3969,13 +4104,18 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops control_ops = { .func = ftrace_ops_control_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; -static void -ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) +static inline void +__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ignored, struct pt_regs *regs) { struct ftrace_ops *op; + if (function_trace_stop) + return; + if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT))) return; @@ -3988,13 +4128,39 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) op = rcu_dereference_raw(ftrace_ops_list); while (op != &ftrace_list_end) { if (ftrace_ops_test(op, ip)) - op->func(ip, parent_ip); + op->func(ip, parent_ip, op, regs); op = rcu_dereference_raw(op->next); }; preempt_enable_notrace(); trace_recursion_clear(TRACE_INTERNAL_BIT); } +/* + * Some archs only support passing ip and parent_ip. Even though + * the list function ignores the op parameter, we do not want any + * C side effects, where a function is called without the caller + * sending a third parameter. + * Archs are to support both the regs and ftrace_ops at the same time. + * If they support ftrace_ops, it is assumed they support regs. + * If call backs want to use regs, they must either check for regs + * being NULL, or ARCH_SUPPORTS_FTRACE_SAVE_REGS. + * Note, ARCH_SUPPORT_SAVE_REGS expects a full regs to be saved. + * An architecture can pass partial regs with ftrace_ops and still + * set the ARCH_SUPPORT_FTARCE_OPS. + */ +#if ARCH_SUPPORTS_FTRACE_OPS +static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *regs) +{ + __ftrace_ops_list_func(ip, parent_ip, NULL, regs); +} +#else +static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) +{ + __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); +} +#endif + static void clear_ftrace_swapper(void) { struct task_struct *p; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 49491fa7daa2..b32ed0e385a5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2816,7 +2816,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable); * to the buffer after this will fail and return NULL. * * This is different than ring_buffer_record_disable() as - * it works like an on/off switch, where as the disable() verison + * it works like an on/off switch, where as the disable() version * must be paired with a enable(). */ void ring_buffer_record_off(struct ring_buffer *buffer) @@ -2839,7 +2839,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_off); * ring_buffer_record_off(). * * This is different than ring_buffer_record_enable() as - * it works like an on/off switch, where as the enable() verison + * it works like an on/off switch, where as the enable() version * must be paired with a disable(). */ void ring_buffer_record_on(struct ring_buffer *buffer) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5c38c81496ce..08acf42e325b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -426,15 +426,15 @@ __setup("trace_buf_size=", set_buf_size); static int __init set_tracing_thresh(char *str) { - unsigned long threshhold; + unsigned long threshold; int ret; if (!str) return 0; - ret = strict_strtoul(str, 0, &threshhold); + ret = strict_strtoul(str, 0, &threshold); if (ret < 0) return 0; - tracing_thresh = threshhold * 1000; + tracing_thresh = threshold * 1000; return 1; } __setup("tracing_thresh=", set_tracing_thresh); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 55e1f7f0db12..593debefc4e9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -472,11 +472,11 @@ extern void trace_find_cmdline(int pid, char comm[]); #ifdef CONFIG_DYNAMIC_FTRACE extern unsigned long ftrace_update_tot_cnt; +#endif #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func extern int DYN_FTRACE_TEST_NAME(void); #define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2 extern int DYN_FTRACE_TEST_NAME2(void); -#endif extern int ring_buffer_expanded; extern bool tracing_selftest_disabled; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 8a6d2ee2086c..84b1e045faba 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -258,7 +258,8 @@ EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); #ifdef CONFIG_FUNCTION_TRACER static void -perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip) +perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *pt_regs) { struct ftrace_entry *entry; struct hlist_head *head; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 29111da1d100..6825d833a257 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1681,7 +1681,8 @@ static __init void event_trace_self_tests(void) static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable); static void -function_test_events_call(unsigned long ip, unsigned long parent_ip) +function_test_events_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct ring_buffer_event *event; struct ring_buffer *buffer; @@ -1720,6 +1721,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __initdata = { .func = function_test_events_call, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static __init void event_trace_self_test_with_function(void) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 431dba8b7542..c154797a7ff7 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -2002,7 +2002,7 @@ static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter, static int __ftrace_function_set_filter(int filter, char *buf, int len, struct function_filter_data *data) { - int i, re_cnt, ret; + int i, re_cnt, ret = -EINVAL; int *reset; char **re; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index a426f410c060..483162a9f908 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -49,7 +49,8 @@ static void function_trace_start(struct trace_array *tr) } static void -function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) +function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct trace_array *tr = func_trace; struct trace_array_cpu *data; @@ -84,7 +85,9 @@ enum { static struct tracer_flags func_flags; static void -function_trace_call(unsigned long ip, unsigned long parent_ip) +function_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) + { struct trace_array *tr = func_trace; struct trace_array_cpu *data; @@ -121,7 +124,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } static void -function_stack_trace_call(unsigned long ip, unsigned long parent_ip) +function_stack_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct trace_array *tr = func_trace; struct trace_array_cpu *data; @@ -164,13 +168,13 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __read_mostly = { .func = function_trace_call, - .flags = FTRACE_OPS_FL_GLOBAL, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops trace_stack_ops __read_mostly = { .func = function_stack_trace_call, - .flags = FTRACE_OPS_FL_GLOBAL, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; static struct tracer_opt func_opts[] = { diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index ce27c8ba8d31..99b4378393d5 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -143,7 +143,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, return; } -#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST +#if defined(CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST) && !defined(CC_USING_FENTRY) /* * The arch may choose to record the frame pointer used * and check it here to make sure that it is what we expect it @@ -154,6 +154,9 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, * * Currently, x86_32 with optimize for size (-Os) makes the latest * gcc do the above. + * + * Note, -mfentry does not use frame pointers, and this test + * is not needed if CC_USING_FENTRY is set. */ if (unlikely(current->ret_stack[index].fp != frame_pointer)) { ftrace_graph_stop(); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 99d20e920368..d98ee8283b29 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -136,7 +136,8 @@ static int func_prolog_dec(struct trace_array *tr, * irqsoff uses its own tracer function to keep the overhead down: */ static void -irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) +irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; @@ -153,7 +154,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __read_mostly = { .func = irqsoff_tracer_call, - .flags = FTRACE_OPS_FL_GLOBAL, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index ff791ea48b57..02170c00c413 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -108,7 +108,8 @@ out_enable: * wakeup uses its own tracer function to keep the overhead down: */ static void -wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) +wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; @@ -129,7 +130,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __read_mostly = { .func = wakeup_tracer_call, - .flags = FTRACE_OPS_FL_GLOBAL, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 288541f977fb..2c00a691a540 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -103,54 +103,67 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret) static int trace_selftest_test_probe1_cnt; static void trace_selftest_test_probe1_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_probe1_cnt++; } static int trace_selftest_test_probe2_cnt; static void trace_selftest_test_probe2_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_probe2_cnt++; } static int trace_selftest_test_probe3_cnt; static void trace_selftest_test_probe3_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_probe3_cnt++; } static int trace_selftest_test_global_cnt; static void trace_selftest_test_global_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_global_cnt++; } static int trace_selftest_test_dyn_cnt; static void trace_selftest_test_dyn_func(unsigned long ip, - unsigned long pip) + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) { trace_selftest_test_dyn_cnt++; } static struct ftrace_ops test_probe1 = { .func = trace_selftest_test_probe1_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops test_probe2 = { .func = trace_selftest_test_probe2_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops test_probe3 = { .func = trace_selftest_test_probe3_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static struct ftrace_ops test_global = { - .func = trace_selftest_test_global_func, - .flags = FTRACE_OPS_FL_GLOBAL, + .func = trace_selftest_test_global_func, + .flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE, }; static void print_counts(void) @@ -393,10 +406,253 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, return ret; } + +static int trace_selftest_recursion_cnt; +static void trace_selftest_test_recursion_func(unsigned long ip, + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) +{ + /* + * This function is registered without the recursion safe flag. + * The ftrace infrastructure should provide the recursion + * protection. If not, this will crash the kernel! + */ + trace_selftest_recursion_cnt++; + DYN_FTRACE_TEST_NAME(); +} + +static void trace_selftest_test_recursion_safe_func(unsigned long ip, + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) +{ + /* + * We said we would provide our own recursion. By calling + * this function again, we should recurse back into this function + * and count again. But this only happens if the arch supports + * all of ftrace features and nothing else is using the function + * tracing utility. + */ + if (trace_selftest_recursion_cnt++) + return; + DYN_FTRACE_TEST_NAME(); +} + +static struct ftrace_ops test_rec_probe = { + .func = trace_selftest_test_recursion_func, +}; + +static struct ftrace_ops test_recsafe_probe = { + .func = trace_selftest_test_recursion_safe_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, +}; + +static int +trace_selftest_function_recursion(void) +{ + int save_ftrace_enabled = ftrace_enabled; + int save_tracer_enabled = tracer_enabled; + char *func_name; + int len; + int ret; + int cnt; + + /* The previous test PASSED */ + pr_cont("PASSED\n"); + pr_info("Testing ftrace recursion: "); + + + /* enable tracing, and record the filter function */ + ftrace_enabled = 1; + tracer_enabled = 1; + + /* Handle PPC64 '.' name */ + func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); + len = strlen(func_name); + + ret = ftrace_set_filter(&test_rec_probe, func_name, len, 1); + if (ret) { + pr_cont("*Could not set filter* "); + goto out; + } + + ret = register_ftrace_function(&test_rec_probe); + if (ret) { + pr_cont("*could not register callback* "); + goto out; + } + + DYN_FTRACE_TEST_NAME(); + + unregister_ftrace_function(&test_rec_probe); + + ret = -1; + if (trace_selftest_recursion_cnt != 1) { + pr_cont("*callback not called once (%d)* ", + trace_selftest_recursion_cnt); + goto out; + } + + trace_selftest_recursion_cnt = 1; + + pr_cont("PASSED\n"); + pr_info("Testing ftrace recursion safe: "); + + ret = ftrace_set_filter(&test_recsafe_probe, func_name, len, 1); + if (ret) { + pr_cont("*Could not set filter* "); + goto out; + } + + ret = register_ftrace_function(&test_recsafe_probe); + if (ret) { + pr_cont("*could not register callback* "); + goto out; + } + + DYN_FTRACE_TEST_NAME(); + + unregister_ftrace_function(&test_recsafe_probe); + + /* + * If arch supports all ftrace features, and no other task + * was on the list, we should be fine. + */ + if (!ftrace_nr_registered_ops() && !FTRACE_FORCE_LIST_FUNC) + cnt = 2; /* Should have recursed */ + else + cnt = 1; + + ret = -1; + if (trace_selftest_recursion_cnt != cnt) { + pr_cont("*callback not called expected %d times (%d)* ", + cnt, trace_selftest_recursion_cnt); + goto out; + } + + ret = 0; +out: + ftrace_enabled = save_ftrace_enabled; + tracer_enabled = save_tracer_enabled; + + return ret; +} #else # define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) +# define trace_selftest_function_recursion() ({ 0; }) #endif /* CONFIG_DYNAMIC_FTRACE */ +static enum { + TRACE_SELFTEST_REGS_START, + TRACE_SELFTEST_REGS_FOUND, + TRACE_SELFTEST_REGS_NOT_FOUND, +} trace_selftest_regs_stat; + +static void trace_selftest_test_regs_func(unsigned long ip, + unsigned long pip, + struct ftrace_ops *op, + struct pt_regs *pt_regs) +{ + if (pt_regs) + trace_selftest_regs_stat = TRACE_SELFTEST_REGS_FOUND; + else + trace_selftest_regs_stat = TRACE_SELFTEST_REGS_NOT_FOUND; +} + +static struct ftrace_ops test_regs_probe = { + .func = trace_selftest_test_regs_func, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_SAVE_REGS, +}; + +static int +trace_selftest_function_regs(void) +{ + int save_ftrace_enabled = ftrace_enabled; + int save_tracer_enabled = tracer_enabled; + char *func_name; + int len; + int ret; + int supported = 0; + +#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS + supported = 1; +#endif + + /* The previous test PASSED */ + pr_cont("PASSED\n"); + pr_info("Testing ftrace regs%s: ", + !supported ? "(no arch support)" : ""); + + /* enable tracing, and record the filter function */ + ftrace_enabled = 1; + tracer_enabled = 1; + + /* Handle PPC64 '.' name */ + func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); + len = strlen(func_name); + + ret = ftrace_set_filter(&test_regs_probe, func_name, len, 1); + /* + * If DYNAMIC_FTRACE is not set, then we just trace all functions. + * This test really doesn't care. + */ + if (ret && ret != -ENODEV) { + pr_cont("*Could not set filter* "); + goto out; + } + + ret = register_ftrace_function(&test_regs_probe); + /* + * Now if the arch does not support passing regs, then this should + * have failed. + */ + if (!supported) { + if (!ret) { + pr_cont("*registered save-regs without arch support* "); + goto out; + } + test_regs_probe.flags |= FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED; + ret = register_ftrace_function(&test_regs_probe); + } + if (ret) { + pr_cont("*could not register callback* "); + goto out; + } + + + DYN_FTRACE_TEST_NAME(); + + unregister_ftrace_function(&test_regs_probe); + + ret = -1; + + switch (trace_selftest_regs_stat) { + case TRACE_SELFTEST_REGS_START: + pr_cont("*callback never called* "); + goto out; + + case TRACE_SELFTEST_REGS_FOUND: + if (supported) + break; + pr_cont("*callback received regs without arch support* "); + goto out; + + case TRACE_SELFTEST_REGS_NOT_FOUND: + if (!supported) + break; + pr_cont("*callback received NULL regs* "); + goto out; + } + + ret = 0; +out: + ftrace_enabled = save_ftrace_enabled; + tracer_enabled = save_tracer_enabled; + + return ret; +} + /* * Simple verification test of ftrace function tracer. * Enable ftrace, sleep 1/10 second, and then read the trace @@ -442,7 +698,14 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ret = trace_selftest_startup_dynamic_tracing(trace, tr, DYN_FTRACE_TEST_NAME); + if (ret) + goto out; + ret = trace_selftest_function_recursion(); + if (ret) + goto out; + + ret = trace_selftest_function_regs(); out: ftrace_enabled = save_ftrace_enabled; tracer_enabled = save_tracer_enabled; @@ -778,6 +1041,8 @@ static int trace_wakeup_test_thread(void *data) set_current_state(TASK_INTERRUPTIBLE); schedule(); + complete(x); + /* we are awake, now wait to disappear */ while (!kthread_should_stop()) { /* @@ -821,24 +1086,21 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) /* reset the max latency */ tracing_max_latency = 0; - /* sleep to let the RT thread sleep too */ - msleep(100); + while (p->on_rq) { + /* + * Sleep to make sure the RT thread is asleep too. + * On virtual machines we can't rely on timings, + * but we want to make sure this test still works. + */ + msleep(100); + } - /* - * Yes this is slightly racy. It is possible that for some - * strange reason that the RT thread we created, did not - * call schedule for 100ms after doing the completion, - * and we do a wakeup on a task that already is awake. - * But that is extremely unlikely, and the worst thing that - * happens in such a case, is that we disable tracing. - * Honestly, if this race does happen something is horrible - * wrong with the system. - */ + init_completion(&isrt); wake_up_process(p); - /* give a little time to let the thread wake up */ - msleep(100); + /* Wait for the task to wake up */ + wait_for_completion(&isrt); /* stop the tracing. */ tracing_stop(); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index d4545f49242e..0c1b165778e5 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -111,7 +111,8 @@ static inline void check_stack(void) } static void -stack_trace_call(unsigned long ip, unsigned long parent_ip) +stack_trace_call(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct pt_regs *pt_regs) { int cpu; @@ -136,6 +137,7 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip) static struct ftrace_ops trace_ops __read_mostly = { .func = stack_trace_call, + .flags = FTRACE_OPS_FL_RECURSION_SAFE, }; static ssize_t diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 60e4d7875672..6b245f64c8dd 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -506,6 +506,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) int size; syscall_nr = syscall_get_nr(current, regs); + if (syscall_nr < 0) + return; if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) return; @@ -580,6 +582,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) int size; syscall_nr = syscall_get_nr(current, regs); + if (syscall_nr < 0) + return; if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) return; diff --git a/mm/filemap.c b/mm/filemap.c index fa5ca304148e..384344575c37 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1412,12 +1412,8 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, retval = filemap_write_and_wait_range(mapping, pos, pos + iov_length(iov, nr_segs) - 1); if (!retval) { - struct blk_plug plug; - - blk_start_plug(&plug); retval = mapping->a_ops->direct_IO(READ, iocb, iov, pos, nr_segs); - blk_finish_plug(&plug); } if (retval > 0) { *ppos = pos + retval; @@ -2527,14 +2523,12 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - struct blk_plug plug; ssize_t ret; BUG_ON(iocb->ki_pos != pos); sb_start_write(inode->i_sb); mutex_lock(&inode->i_mutex); - blk_start_plug(&plug); ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); @@ -2545,7 +2539,6 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err < 0 && ret > 0) ret = err; } - blk_finish_plug(&plug); sb_end_write(inode->i_sb); return ret; } diff --git a/mm/mmap.c b/mm/mmap.c index 9adee9fc0d8a..ae18a48e7e4e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1356,9 +1356,8 @@ out: } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) make_pages_present(addr, addr + len); - if (file && uprobe_mmap(vma)) - /* matching probes but cannot insert */ - goto unmap_and_free_vma; + if (file) + uprobe_mmap(vma); return addr; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 88f2bf671960..bac973a31367 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -316,7 +316,6 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) */ void svc_xprt_enqueue(struct svc_xprt *xprt) { - struct svc_serv *serv = xprt->xpt_server; struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; @@ -362,8 +361,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) rqstp, rqstp->rq_xprt); rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); pool->sp_stats.threads_woken++; wake_up(&rqstp->rq_wait); } else { @@ -640,8 +637,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) if (xprt) { rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); /* As there is a shortage of threads and this request * had to be queued, don't allow the thread to wait so @@ -738,6 +733,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) else len = xprt->xpt_ops->xpo_recvfrom(rqstp); dprintk("svc: got len=%d\n", len); + rqstp->rq_reserved = serv->sv_max_mesg; + atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } svc_xprt_received(xprt); @@ -794,7 +791,8 @@ int svc_send(struct svc_rqst *rqstp) /* Grab mutex to serialize outgoing data. */ mutex_lock(&xprt->xpt_mutex); - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) + if (test_bit(XPT_DEAD, &xprt->xpt_flags) + || test_bit(XPT_CLOSE, &xprt->xpt_flags)) len = -ENOTCONN; else len = xprt->xpt_ops->xpo_sendto(rqstp); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 18bc130255a7..998aa8c1807c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1129,9 +1129,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len >= 0) svsk->sk_tcplen += len; if (len != want) { + svc_tcp_save_pages(svsk, rqstp); if (len < 0 && len != -EAGAIN) goto err_other; - svc_tcp_save_pages(svsk, rqstp); dprintk("svc: incomplete TCP record (%d of %d)\n", svsk->sk_tcplen, svsk->sk_reclen); goto err_noclose; diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 54e35c1e5948..9d1421e63ff8 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -261,11 +261,13 @@ static unsigned get_mcountsym(Elf_Sym const *const sym0, &sym0[Elf_r_sym(relp)]; char const *symname = &str0[w(symp->st_name)]; char const *mcount = gpfx == '_' ? "_mcount" : "mcount"; + char const *fentry = "__fentry__"; if (symname[0] == '.') ++symname; /* ppc64 hack */ if (strcmp(mcount, symname) == 0 || - (altmcount && strcmp(altmcount, symname) == 0)) + (altmcount && strcmp(altmcount, symname) == 0) || + (strcmp(fentry, symname) == 0)) mcountsym = Elf_r_sym(relp); return mcountsym; diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 5f34aa371b56..b5b4d806ffa2 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -31,6 +31,7 @@ #include <stdarg.h> #include <ctype.h> #include <errno.h> +#include <stdint.h> #include "event-parse.h" #include "event-utils.h" @@ -3485,7 +3486,7 @@ process_defined_func(struct trace_seq *s, void *data, int size, if (!string->str) die("malloc str"); - args[i] = (unsigned long long)string->str; + args[i] = (uintptr_t)string->str; strings = string; trace_seq_destroy(&str); break; @@ -4685,9 +4686,8 @@ static int find_event_handle(struct pevent *pevent, struct event_format *event) * * /sys/kernel/debug/tracing/events/.../.../format */ -int pevent_parse_event(struct pevent *pevent, - const char *buf, unsigned long size, - const char *sys) +enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, + unsigned long size, const char *sys) { struct event_format *event; int ret; @@ -4696,17 +4696,16 @@ int pevent_parse_event(struct pevent *pevent, event = alloc_event(); if (!event) - return -ENOMEM; + return PEVENT_ERRNO__MEM_ALLOC_FAILED; event->name = event_read_name(); if (!event->name) { /* Bad event? */ - free(event); - return -1; + ret = PEVENT_ERRNO__MEM_ALLOC_FAILED; + goto event_alloc_failed; } if (strcmp(sys, "ftrace") == 0) { - event->flags |= EVENT_FL_ISFTRACE; if (strcmp(event->name, "bprint") == 0) @@ -4714,20 +4713,28 @@ int pevent_parse_event(struct pevent *pevent, } event->id = event_read_id(); - if (event->id < 0) - die("failed to read event id"); + if (event->id < 0) { + ret = PEVENT_ERRNO__READ_ID_FAILED; + /* + * This isn't an allocation error actually. + * But as the ID is critical, just bail out. + */ + goto event_alloc_failed; + } event->system = strdup(sys); - if (!event->system) - die("failed to allocate system"); + if (!event->system) { + ret = PEVENT_ERRNO__MEM_ALLOC_FAILED; + goto event_alloc_failed; + } /* Add pevent to event so that it can be referenced */ event->pevent = pevent; ret = event_read_format(event); if (ret < 0) { - do_warning("failed to read event format for %s", event->name); - goto event_failed; + ret = PEVENT_ERRNO__READ_FORMAT_FAILED; + goto event_parse_failed; } /* @@ -4739,10 +4746,9 @@ int pevent_parse_event(struct pevent *pevent, ret = event_read_print(event); if (ret < 0) { - do_warning("failed to read event print fmt for %s", - event->name); show_warning = 1; - goto event_failed; + ret = PEVENT_ERRNO__READ_PRINT_FAILED; + goto event_parse_failed; } show_warning = 1; @@ -4753,20 +4759,19 @@ int pevent_parse_event(struct pevent *pevent, struct print_arg *arg, **list; /* old ftrace had no args */ - list = &event->print_fmt.args; for (field = event->format.fields; field; field = field->next) { arg = alloc_arg(); - *list = arg; - list = &arg->next; arg->type = PRINT_FIELD; arg->field.name = strdup(field->name); if (!arg->field.name) { - do_warning("failed to allocate field name"); event->flags |= EVENT_FL_FAILED; - return -1; + free_arg(arg); + return PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED; } arg->field.field = field; + *list = arg; + list = &arg->next; } return 0; } @@ -4777,11 +4782,65 @@ int pevent_parse_event(struct pevent *pevent, return 0; - event_failed: + event_parse_failed: event->flags |= EVENT_FL_FAILED; /* still add it even if it failed */ add_event(pevent, event); - return -1; + return ret; + + event_alloc_failed: + free(event->system); + free(event->name); + free(event); + return ret; +} + +#undef _PE +#define _PE(code, str) str +static const char * const pevent_error_str[] = { + PEVENT_ERRORS +}; +#undef _PE + +int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum, + char *buf, size_t buflen) +{ + int idx; + const char *msg; + + if (errnum >= 0) { + msg = strerror_r(errnum, buf, buflen); + if (msg != buf) { + size_t len = strlen(msg); + char *c = mempcpy(buf, msg, min(buflen-1, len)); + *c = '\0'; + } + return 0; + } + + if (errnum <= __PEVENT_ERRNO__START || + errnum >= __PEVENT_ERRNO__END) + return -1; + + idx = errnum - __PEVENT_ERRNO__START - 1; + msg = pevent_error_str[idx]; + + switch (errnum) { + case PEVENT_ERRNO__MEM_ALLOC_FAILED: + case PEVENT_ERRNO__PARSE_EVENT_FAILED: + case PEVENT_ERRNO__READ_ID_FAILED: + case PEVENT_ERRNO__READ_FORMAT_FAILED: + case PEVENT_ERRNO__READ_PRINT_FAILED: + case PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED: + snprintf(buf, buflen, "%s", msg); + break; + + default: + /* cannot reach here */ + break; + } + + return 0; } int get_field_val(struct trace_seq *s, struct format_field *field, diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 5772ad8cb386..527df038a25f 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -345,6 +345,34 @@ enum pevent_flag { PEVENT_NSEC_OUTPUT = 1, /* output in NSECS */ }; +#define PEVENT_ERRORS \ + _PE(MEM_ALLOC_FAILED, "failed to allocate memory"), \ + _PE(PARSE_EVENT_FAILED, "failed to parse event"), \ + _PE(READ_ID_FAILED, "failed to read event id"), \ + _PE(READ_FORMAT_FAILED, "failed to read event format"), \ + _PE(READ_PRINT_FAILED, "failed to read event print fmt"), \ + _PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace") + +#undef _PE +#define _PE(__code, __str) PEVENT_ERRNO__ ## __code +enum pevent_errno { + PEVENT_ERRNO__SUCCESS = 0, + + /* + * Choose an arbitrary negative big number not to clash with standard + * errno since SUS requires the errno has distinct positive values. + * See 'Issue 6' in the link below. + * + * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html + */ + __PEVENT_ERRNO__START = -100000, + + PEVENT_ERRORS, + + __PEVENT_ERRNO__END, +}; +#undef _PE + struct cmdline; struct cmdline_list; struct func_map; @@ -509,8 +537,8 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long size, int long_size); -int pevent_parse_event(struct pevent *pevent, const char *buf, - unsigned long size, const char *sys); +enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf, + unsigned long size, const char *sys); void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event, const char *name, struct pevent_record *record, @@ -561,6 +589,8 @@ int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec); const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid); void pevent_event_info(struct trace_seq *s, struct event_format *event, struct pevent_record *record); +int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum, + char *buf, size_t buflen); struct event_format **pevent_list_events(struct pevent *pevent, enum event_sort_type); struct format_field **pevent_event_common_fields(struct event_format *event); diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h index 08296383d1e6..bc075006966e 100644 --- a/tools/lib/traceevent/event-utils.h +++ b/tools/lib/traceevent/event-utils.h @@ -39,6 +39,12 @@ void __vdie(const char *fmt, ...); void __vwarning(const char *fmt, ...); void __vpr_stat(const char *fmt, ...); +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + static inline char *strim(char *string) { char *ret; diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index ca600e09c8d4..9f2e44f2b17a 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -195,10 +195,10 @@ install-pdf: pdf #install-html: html # '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) -../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE - $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE +$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE + $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE --include ../PERF-VERSION-FILE +-include $(OUTPUT)PERF-VERSION-FILE # # Determine "include::" file references in asciidoc files. diff --git a/tools/perf/Documentation/jit-interface.txt b/tools/perf/Documentation/jit-interface.txt new file mode 100644 index 000000000000..a8656f564915 --- /dev/null +++ b/tools/perf/Documentation/jit-interface.txt @@ -0,0 +1,15 @@ +perf supports a simple JIT interface to resolve symbols for dynamic code generated +by a JIT. + +The JIT has to write a /tmp/perf-%d.map (%d = pid of process) file + +This is a text file. + +Each line has the following format, fields separated with spaces: + +START SIZE symbolname + +START and SIZE are hex numbers without 0x. +symbolname is the rest of the line, so it could contain special characters. + +The ownership of the file has to match the process. diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index ddc22525228d..d1e39dc8c810 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -15,24 +15,43 @@ DESCRIPTION This command displays the symbolic event types which can be selected in the various perf commands with the -e option. +[[EVENT_MODIFIERS]] EVENT MODIFIERS --------------- Events can optionally have a modifer by appending a colon and one or -more modifiers. Modifiers allow the user to restrict when events are -counted with 'u' for user-space, 'k' for kernel, 'h' for hypervisor. -Additional modifiers are 'G' for guest counting (in KVM guests) and 'H' -for host counting (not in KVM guests). +more modifiers. Modifiers allow the user to restrict the events to be +counted. The following modifiers exist: + + u - user-space counting + k - kernel counting + h - hypervisor counting + G - guest counting (in KVM guests) + H - host counting (not in KVM guests) + p - precise level The 'p' modifier can be used for specifying how precise the instruction -address should be. The 'p' modifier is currently only implemented for -Intel PEBS and can be specified multiple times: - 0 - SAMPLE_IP can have arbitrary skid - 1 - SAMPLE_IP must have constant skid - 2 - SAMPLE_IP requested to have 0 skid - 3 - SAMPLE_IP must have 0 skid +address should be. The 'p' modifier can be specified multiple times: + + 0 - SAMPLE_IP can have arbitrary skid + 1 - SAMPLE_IP must have constant skid + 2 - SAMPLE_IP requested to have 0 skid + 3 - SAMPLE_IP must have 0 skid + +For Intel systems precise event sampling is implemented with PEBS +which supports up to precise-level 2. -The PEBS implementation now supports up to 2. +On AMD systems it is implemented using IBS (up to precise-level 2). +The precise modifier works with event types 0x76 (cpu-cycles, CPU +clocks not halted) and 0xC1 (micro-ops retired). Both events map to +IBS execution sampling (IBS op) with the IBS Op Counter Control bit +(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s +Manual Volume 2: System Programming, 13.3 Instruction-Based +Sampling). Examples to use IBS: + + perf record -a -e cpu-cycles:p ... # use ibs op counting cycles + perf record -a -e r076:p ... # same as -e cpu-cycles:p + perf record -a -e r0C1:p ... # use ibs op counting micro-ops RAW HARDWARE EVENT DESCRIPTOR ----------------------------- @@ -44,6 +63,11 @@ layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Softwar of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344, Figure 13-7 Performance Event-Select Register (PerfEvtSeln)). +Note: Only the following bit fields can be set in x86 counter +registers: event, umask, edge, inv, cmask. Esp. guest/host only and +OS/user mode flags must be setup using <<EVENT_MODIFIERS, EVENT +MODIFIERS>>. + Example: If the Intel docs for a QM720 Core i7 describe an event as: @@ -91,4 +115,4 @@ SEE ALSO linkperf:perf-stat[1], linkperf:perf-top[1], linkperf:perf-record[1], http://www.intel.com/Assets/PDF/manual/253669.pdf[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide], -http://support.amd.com/us/Processor_TechDocs/24593.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming] +http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming] diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt index 3152cca15501..d00bef231340 100644 --- a/tools/perf/Documentation/perf-script-perl.txt +++ b/tools/perf/Documentation/perf-script-perl.txt @@ -116,8 +116,8 @@ search path and 'use'ing a few support modules (see module descriptions below): ---- - use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/perf-script-Util/lib"; - use lib "./perf-script-Util/lib"; + use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; + use lib "./Perf-Trace-Util/lib"; use Perf::Trace::Core; use Perf::Trace::Context; use Perf::Trace::Util; diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index 471022069119..a4027f221a53 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -129,7 +129,7 @@ import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -216,7 +216,7 @@ import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -279,7 +279,7 @@ import os import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * @@ -391,7 +391,7 @@ drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 . drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 .. drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin -rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py -drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 perf-script-Util +drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util -rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py ---- @@ -518,7 +518,7 @@ descriptions below): import sys sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/perf-script-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from perf_trace_context import * from Core import * diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 35655c3a7b7a..722ddee61f9f 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -37,7 +37,14 @@ include config/utilities.mak # # Define NO_NEWT if you do not want TUI support. # +# Define NO_GTK2 if you do not want GTK+ GUI support. +# # Define NO_DEMANGLE if you do not want C++ symbol demangling. +# +# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds) +# +# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf +# backtrace post unwind. $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) @@ -50,13 +57,16 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ -e s/sh[234].*/sh/ ) +NO_PERF_REGS := 1 CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar # Additional ARCH settings for x86 ifeq ($(ARCH),i386) - ARCH := x86 + ARCH := x86 + NO_PERF_REGS := 0 + LIBUNWIND_LIBS = -lunwind -lunwind-x86 endif ifeq ($(ARCH),x86_64) ARCH := x86 @@ -69,6 +79,8 @@ ifeq ($(ARCH),x86_64) ARCH_CFLAGS := -DARCH_X86_64 ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S endif + NO_PERF_REGS := 0 + LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 endif # Treat warnings as errors unless directed not to @@ -89,7 +101,7 @@ ifdef PARSER_DEBUG PARSER_DEBUG_CFLAGS := -DPARSER_DEBUG endif -CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) +CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS) EXTLIBS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE ALL_LDFLAGS = $(LDFLAGS) @@ -186,10 +198,10 @@ SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) TRACE_EVENT_DIR = ../lib/traceevent/ -ifeq ("$(origin O)", "command line") - TE_PATH=$(OUTPUT)/ +ifneq ($(OUTPUT),) + TE_PATH=$(OUTPUT) else - TE_PATH=$(TRACE_EVENT_DIR)/ + TE_PATH=$(TRACE_EVENT_DIR) endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a @@ -321,6 +333,9 @@ LIB_H += $(TRACE_EVENT_DIR)event-parse.h LIB_H += util/target.h LIB_H += util/rblist.h LIB_H += util/intlist.h +LIB_H += util/perf_regs.h +LIB_H += util/unwind.h +LIB_H += ui/helpline.h LIB_OBJS += $(OUTPUT)util/abspath.o LIB_OBJS += $(OUTPUT)util/alias.o @@ -356,6 +371,7 @@ LIB_OBJS += $(OUTPUT)util/usage.o LIB_OBJS += $(OUTPUT)util/wrapper.o LIB_OBJS += $(OUTPUT)util/sigchain.o LIB_OBJS += $(OUTPUT)util/symbol.o +LIB_OBJS += $(OUTPUT)util/symbol-elf.o LIB_OBJS += $(OUTPUT)util/dso-test-data.o LIB_OBJS += $(OUTPUT)util/color.o LIB_OBJS += $(OUTPUT)util/pager.o @@ -387,11 +403,11 @@ LIB_OBJS += $(OUTPUT)util/cgroup.o LIB_OBJS += $(OUTPUT)util/target.o LIB_OBJS += $(OUTPUT)util/rblist.o LIB_OBJS += $(OUTPUT)util/intlist.o +LIB_OBJS += $(OUTPUT)ui/helpline.o +LIB_OBJS += $(OUTPUT)ui/stdio/hist.o BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o - BUILTIN_OBJS += $(OUTPUT)builtin-bench.o - # Benchmark modules BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o @@ -449,34 +465,73 @@ PYRF_OBJS += $(OUTPUT)util/xyarray.o -include config.mak.autogen -include config.mak -ifndef NO_DWARF -FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS) -ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y) - msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); +ifdef NO_LIBELF NO_DWARF := 1 -endif # Dwarf support -endif # NO_DWARF - --include arch/$(ARCH)/Makefile - -ifneq ($(OUTPUT),) - BASIC_CFLAGS += -I$(OUTPUT) -endif - + NO_DEMANGLE := 1 + NO_LIBUNWIND := 1 +else FLAGS_LIBELF=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF)),y) FLAGS_GLIBC=$(ALL_CFLAGS) $(ALL_LDFLAGS) ifneq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC)),y) msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); else - msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel); + NO_LIBELF := 1 + NO_DWARF := 1 + NO_DEMANGLE := 1 endif endif +endif # NO_LIBELF + +ifndef NO_LIBUNWIND +# for linking with debug library, run like: +# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ +ifdef LIBUNWIND_DIR + LIBUNWIND_CFLAGS := -I$(LIBUNWIND_DIR)/include + LIBUNWIND_LDFLAGS := -L$(LIBUNWIND_DIR)/lib +endif + +FLAGS_UNWIND=$(LIBUNWIND_CFLAGS) $(ALL_CFLAGS) $(LIBUNWIND_LDFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(LIBUNWIND_LIBS) +ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND)),y) + msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99); + NO_LIBUNWIND := 1 +endif # Libunwind support +endif # NO_LIBUNWIND + +-include arch/$(ARCH)/Makefile + +ifneq ($(OUTPUT),) + BASIC_CFLAGS += -I$(OUTPUT) +endif + +ifdef NO_LIBELF +BASIC_CFLAGS += -DNO_LIBELF_SUPPORT + +EXTLIBS := $(filter-out -lelf,$(EXTLIBS)) + +# Remove ELF/DWARF dependent codes +LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS)) +LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS)) +LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS)) +LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS)) + +BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS)) + +# Use minimal symbol handling +LIB_OBJS += $(OUTPUT)util/symbol-minimal.o + +else # NO_LIBELF ifneq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_COMMON)),y) BASIC_CFLAGS += -DLIBELF_NO_MMAP endif +FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS) +ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y) + msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); + NO_DWARF := 1 +endif # Dwarf support + ifndef NO_DWARF ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled); @@ -487,6 +542,16 @@ else LIB_OBJS += $(OUTPUT)util/dwarf-aux.o endif # PERF_HAVE_DWARF_REGS endif # NO_DWARF +endif # NO_LIBELF + +ifdef NO_LIBUNWIND + BASIC_CFLAGS += -DNO_LIBUNWIND_SUPPORT +else + EXTLIBS += $(LIBUNWIND_LIBS) + BASIC_CFLAGS := $(LIBUNWIND_CFLAGS) $(BASIC_CFLAGS) + BASIC_LDFLAGS := $(LIBUNWIND_LDFLAGS) $(BASIC_LDFLAGS) + LIB_OBJS += $(OUTPUT)util/unwind.o +endif ifdef NO_NEWT BASIC_CFLAGS += -DNO_NEWT_SUPPORT @@ -504,14 +569,13 @@ else LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o LIB_OBJS += $(OUTPUT)ui/browsers/hists.o LIB_OBJS += $(OUTPUT)ui/browsers/map.o - LIB_OBJS += $(OUTPUT)ui/helpline.o LIB_OBJS += $(OUTPUT)ui/progress.o LIB_OBJS += $(OUTPUT)ui/util.o LIB_OBJS += $(OUTPUT)ui/tui/setup.o LIB_OBJS += $(OUTPUT)ui/tui/util.o + LIB_OBJS += $(OUTPUT)ui/tui/helpline.o LIB_H += ui/browser.h LIB_H += ui/browsers/map.h - LIB_H += ui/helpline.h LIB_H += ui/keysyms.h LIB_H += ui/libslang.h LIB_H += ui/progress.h @@ -523,7 +587,7 @@ endif ifdef NO_GTK2 BASIC_CFLAGS += -DNO_GTK2_SUPPORT else - FLAGS_GTK2=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0) + FLAGS_GTK2=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null) ifneq ($(call try-cc,$(SOURCE_GTK2),$(FLAGS_GTK2)),y) msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev); BASIC_CFLAGS += -DNO_GTK2_SUPPORT @@ -531,11 +595,12 @@ else ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2)),y) BASIC_CFLAGS += -DHAVE_GTK_INFO_BAR endif - BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0) - EXTLIBS += $(shell pkg-config --libs gtk+-2.0) + BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null) + EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null) LIB_OBJS += $(OUTPUT)ui/gtk/browser.o LIB_OBJS += $(OUTPUT)ui/gtk/setup.o LIB_OBJS += $(OUTPUT)ui/gtk/util.o + LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o # Make sure that it'd be included only once. ifneq ($(findstring -DNO_NEWT_SUPPORT,$(BASIC_CFLAGS)),) LIB_OBJS += $(OUTPUT)ui/setup.o @@ -674,6 +739,13 @@ else endif endif +ifeq ($(NO_PERF_REGS),0) + ifeq ($(ARCH),x86) + LIB_H += arch/x86/include/perf_regs.h + endif +else + BASIC_CFLAGS += -DNO_PERF_REGS +endif ifdef NO_STRLCPY BASIC_CFLAGS += -DNO_STRLCPY @@ -700,6 +772,7 @@ perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) template_dir_SQ = $(subst ','\'',$(template_dir)) htmldir_SQ = $(subst ','\'',$(htmldir)) prefix_SQ = $(subst ','\'',$(prefix)) +sysconfdir_SQ = $(subst ','\'',$(sysconfdir)) SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) @@ -767,10 +840,10 @@ $(OUTPUT)perf.o perf.spec \ # over the general rule for .o $(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Iutil/ -w $< + $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(ALL_CFLAGS) -w $< $(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -Iutil/ -w $< + $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(ALL_CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $< $(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< @@ -842,7 +915,7 @@ $(LIB_FILE): $(LIB_OBJS) # libtraceevent.a $(LIBTRACEEVENT): - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) $(COMMAND_O) libtraceevent.a + $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a help: @echo 'Perf make targets:' @@ -951,6 +1024,8 @@ install: all $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace' $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python' $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d' + $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' install-python_ext: $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)' diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 744e629797be..815841c04eb2 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -2,4 +2,7 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif +ifndef NO_LIBUNWIND +LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind.o +endif LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h new file mode 100644 index 000000000000..46fc9f15c6b3 --- /dev/null +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -0,0 +1,80 @@ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include <stdlib.h> +#include "../../util/types.h" +#include "../../../../../arch/x86/include/asm/perf_regs.h" + +#ifndef ARCH_X86_64 +#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) +#else +#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ + (1ULL << PERF_REG_X86_ES) | \ + (1ULL << PERF_REG_X86_FS) | \ + (1ULL << PERF_REG_X86_GS)) +#define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT) +#endif +#define PERF_REG_IP PERF_REG_X86_IP +#define PERF_REG_SP PERF_REG_X86_SP + +static inline const char *perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_X86_AX: + return "AX"; + case PERF_REG_X86_BX: + return "BX"; + case PERF_REG_X86_CX: + return "CX"; + case PERF_REG_X86_DX: + return "DX"; + case PERF_REG_X86_SI: + return "SI"; + case PERF_REG_X86_DI: + return "DI"; + case PERF_REG_X86_BP: + return "BP"; + case PERF_REG_X86_SP: + return "SP"; + case PERF_REG_X86_IP: + return "IP"; + case PERF_REG_X86_FLAGS: + return "FLAGS"; + case PERF_REG_X86_CS: + return "CS"; + case PERF_REG_X86_SS: + return "SS"; + case PERF_REG_X86_DS: + return "DS"; + case PERF_REG_X86_ES: + return "ES"; + case PERF_REG_X86_FS: + return "FS"; + case PERF_REG_X86_GS: + return "GS"; +#ifdef ARCH_X86_64 + case PERF_REG_X86_R8: + return "R8"; + case PERF_REG_X86_R9: + return "R9"; + case PERF_REG_X86_R10: + return "R10"; + case PERF_REG_X86_R11: + return "R11"; + case PERF_REG_X86_R12: + return "R12"; + case PERF_REG_X86_R13: + return "R13"; + case PERF_REG_X86_R14: + return "R14"; + case PERF_REG_X86_R15: + return "R15"; +#endif /* ARCH_X86_64 */ + default: + return NULL; + } + + return NULL; +} + +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/x86/util/unwind.c b/tools/perf/arch/x86/util/unwind.c new file mode 100644 index 000000000000..78d956eff96f --- /dev/null +++ b/tools/perf/arch/x86/util/unwind.c @@ -0,0 +1,111 @@ + +#include <errno.h> +#include <libunwind.h> +#include "perf_regs.h" +#include "../../util/unwind.h" + +#ifdef ARCH_X86_64 +int unwind__arch_reg_id(int regnum) +{ + int id; + + switch (regnum) { + case UNW_X86_64_RAX: + id = PERF_REG_X86_AX; + break; + case UNW_X86_64_RDX: + id = PERF_REG_X86_DX; + break; + case UNW_X86_64_RCX: + id = PERF_REG_X86_CX; + break; + case UNW_X86_64_RBX: + id = PERF_REG_X86_BX; + break; + case UNW_X86_64_RSI: + id = PERF_REG_X86_SI; + break; + case UNW_X86_64_RDI: + id = PERF_REG_X86_DI; + break; + case UNW_X86_64_RBP: + id = PERF_REG_X86_BP; + break; + case UNW_X86_64_RSP: + id = PERF_REG_X86_SP; + break; + case UNW_X86_64_R8: + id = PERF_REG_X86_R8; + break; + case UNW_X86_64_R9: + id = PERF_REG_X86_R9; + break; + case UNW_X86_64_R10: + id = PERF_REG_X86_R10; + break; + case UNW_X86_64_R11: + id = PERF_REG_X86_R11; + break; + case UNW_X86_64_R12: + id = PERF_REG_X86_R12; + break; + case UNW_X86_64_R13: + id = PERF_REG_X86_R13; + break; + case UNW_X86_64_R14: + id = PERF_REG_X86_R14; + break; + case UNW_X86_64_R15: + id = PERF_REG_X86_R15; + break; + case UNW_X86_64_RIP: + id = PERF_REG_X86_IP; + break; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return id; +} +#else +int unwind__arch_reg_id(int regnum) +{ + int id; + + switch (regnum) { + case UNW_X86_EAX: + id = PERF_REG_X86_AX; + break; + case UNW_X86_EDX: + id = PERF_REG_X86_DX; + break; + case UNW_X86_ECX: + id = PERF_REG_X86_CX; + break; + case UNW_X86_EBX: + id = PERF_REG_X86_BX; + break; + case UNW_X86_ESI: + id = PERF_REG_X86_SI; + break; + case UNW_X86_EDI: + id = PERF_REG_X86_DI; + break; + case UNW_X86_EBP: + id = PERF_REG_X86_BP; + break; + case UNW_X86_ESP: + id = PERF_REG_X86_SP; + break; + case UNW_X86_EIP: + id = PERF_REG_X86_IP; + break; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } + + return id; +} +#endif /* ARCH_X86_64 */ diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion new file mode 100644 index 000000000000..1958fa539d0f --- /dev/null +++ b/tools/perf/bash_completion @@ -0,0 +1,26 @@ +# perf completion + +have perf && +_perf() +{ + local cur cmd + + COMPREPLY=() + _get_comp_words_by_ref cur prev + + cmd=${COMP_WORDS[0]} + + # List perf subcommands + if [ $COMP_CWORD -eq 1 ]; then + cmds=$($cmd --list-cmds) + COMPREPLY=( $( compgen -W '$cmds' -- "$cur" ) ) + # List possible events for -e option + elif [[ $prev == "-e" && "${COMP_WORDS[1]}" == @(record|stat|top) ]]; then + cmds=$($cmd list --raw-dump) + COMPREPLY=( $( compgen -W '$cmds' -- "$cur" ) ) + # Fall down to list regular files + else + _filedir + fi +} && +complete -F _perf perf diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 6b2bcfbde150..7d6842826a0c 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -16,8 +16,6 @@ #include "util/session.h" #include "util/symbol.h" -#include <libelf.h> - static const char *input_name; static bool force; static bool show_kernel; @@ -71,7 +69,7 @@ static int perf_session__list_build_ids(void) { struct perf_session *session; - elf_version(EV_CURRENT); + symbol__elf_init(); session = perf_session__new(input_name, O_RDONLY, force, false, &build_id__mark_dso_hit_ops); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 3beab489afc5..64d8ba2fb7bc 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -191,10 +191,13 @@ static int perf_event__inject_buildid(struct perf_tool *tool, * If this fails, too bad, let the other side * account this as unresolved. */ - } else + } else { +#ifndef NO_LIBELF_SUPPORT pr_warning("no symbols found in %s, maybe " "install a debug package?\n", al.map->dso->long_name); +#endif + } } } diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index ce35015f2dc6..fc6607b383f2 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "perf.h" +#include "util/evsel.h" #include "util/util.h" #include "util/cache.h" #include "util/symbol.h" @@ -57,11 +58,6 @@ static unsigned long nr_allocs, nr_cross_allocs; #define PATH_SYS_NODE "/sys/devices/system/node" -struct perf_kmem { - struct perf_tool tool; - struct perf_session *session; -}; - static void init_cpunode_map(void) { FILE *fp; @@ -196,16 +192,15 @@ static void insert_caller_stat(unsigned long call_site, } } -static void process_alloc_event(void *data, - struct event_format *event, - int cpu, - u64 timestamp __used, - struct thread *thread __used, - int node) +static void perf_evsel__process_alloc_event(struct perf_evsel *evsel, + struct perf_sample *sample, + int node) { + struct event_format *event = evsel->tp_format; + void *data = sample->raw_data; unsigned long call_site; unsigned long ptr; - int bytes_req; + int bytes_req, cpu = sample->cpu; int bytes_alloc; int node1, node2; @@ -257,22 +252,18 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr, return NULL; } -static void process_free_event(void *data, - struct event_format *event, - int cpu, - u64 timestamp __used, - struct thread *thread __used) +static void perf_evsel__process_free_event(struct perf_evsel *evsel, + struct perf_sample *sample) { - unsigned long ptr; + unsigned long ptr = raw_field_value(evsel->tp_format, "ptr", + sample->raw_data); struct alloc_stat *s_alloc, *s_caller; - ptr = raw_field_value(event, "ptr", data); - s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); if (!s_alloc) return; - if (cpu != s_alloc->alloc_cpu) { + if ((short)sample->cpu != s_alloc->alloc_cpu) { s_alloc->pingpong++; s_caller = search_alloc_stat(0, s_alloc->call_site, @@ -283,40 +274,34 @@ static void process_free_event(void *data, s_alloc->alloc_cpu = -1; } -static void process_raw_event(struct perf_tool *tool, - union perf_event *raw_event __used, void *data, - int cpu, u64 timestamp, struct thread *thread) +static void perf_evsel__process_kmem_event(struct perf_evsel *evsel, + struct perf_sample *sample) { - struct perf_kmem *kmem = container_of(tool, struct perf_kmem, tool); - struct event_format *event; - int type; - - type = trace_parse_common_type(kmem->session->pevent, data); - event = pevent_find_event(kmem->session->pevent, type); + struct event_format *event = evsel->tp_format; if (!strcmp(event->name, "kmalloc") || !strcmp(event->name, "kmem_cache_alloc")) { - process_alloc_event(data, event, cpu, timestamp, thread, 0); + perf_evsel__process_alloc_event(evsel, sample, 0); return; } if (!strcmp(event->name, "kmalloc_node") || !strcmp(event->name, "kmem_cache_alloc_node")) { - process_alloc_event(data, event, cpu, timestamp, thread, 1); + perf_evsel__process_alloc_event(evsel, sample, 1); return; } if (!strcmp(event->name, "kfree") || !strcmp(event->name, "kmem_cache_free")) { - process_free_event(data, event, cpu, timestamp, thread); + perf_evsel__process_free_event(evsel, sample); return; } } -static int process_sample_event(struct perf_tool *tool, +static int process_sample_event(struct perf_tool *tool __used, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __used, + struct perf_evsel *evsel, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, event->ip.pid); @@ -329,18 +314,14 @@ static int process_sample_event(struct perf_tool *tool, dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - process_raw_event(tool, event, sample->raw_data, sample->cpu, - sample->time, thread); - + perf_evsel__process_kmem_event(evsel, sample); return 0; } -static struct perf_kmem perf_kmem = { - .tool = { - .sample = process_sample_event, - .comm = perf_event__process_comm, - .ordered_samples = true, - }, +static struct perf_tool perf_kmem = { + .sample = process_sample_event, + .comm = perf_event__process_comm, + .ordered_samples = true, }; static double fragmentation(unsigned long n_req, unsigned long n_alloc) @@ -497,13 +478,10 @@ static int __cmd_kmem(void) int err = -EINVAL; struct perf_session *session; - session = perf_session__new(input_name, O_RDONLY, 0, false, - &perf_kmem.tool); + session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_kmem); if (session == NULL) return -ENOMEM; - perf_kmem.session = session; - if (perf_session__create_kernel_maps(session) < 0) goto out_delete; @@ -511,7 +489,7 @@ static int __cmd_kmem(void) goto out_delete; setup_pager(); - err = perf_session__process_events(session, &perf_kmem.tool); + err = perf_session__process_events(session, &perf_kmem); if (err != 0) goto out_delete; sort_result(); diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 6313b6eb3ebb..bdcff81b532a 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -19,15 +19,15 @@ int cmd_list(int argc, const char **argv, const char *prefix __used) setup_pager(); if (argc == 1) - print_events(NULL); + print_events(NULL, false); else { int i; for (i = 1; i < argc; ++i) { - if (i > 1) + if (i > 2) putchar('\n'); if (strncmp(argv[i], "tracepoint", 10) == 0) - print_tracepoint_events(NULL, NULL); + print_tracepoint_events(NULL, NULL, false); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) print_events_type(PERF_TYPE_HARDWARE); @@ -36,13 +36,15 @@ int cmd_list(int argc, const char **argv, const char *prefix __used) print_events_type(PERF_TYPE_SOFTWARE); else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) - print_hwcache_events(NULL); + print_hwcache_events(NULL, false); + else if (strcmp(argv[i], "--raw-dump") == 0) + print_events(NULL, true); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; if (sep == NULL) { - print_events(argv[i]); + print_events(argv[i], false); continue; } sep_idx = sep - argv[i]; @@ -51,7 +53,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __used) return -1; s[sep_idx] = '\0'; - print_tracepoint_events(s, s + sep_idx + 1); + print_tracepoint_events(s, s + sep_idx + 1, false); free(s); } } diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index b3c428548868..585aae2858b8 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1,6 +1,7 @@ #include "builtin.h" #include "perf.h" +#include "util/evsel.h" #include "util/util.h" #include "util/cache.h" #include "util/symbol.h" @@ -356,28 +357,16 @@ struct trace_release_event { struct trace_lock_handler { void (*acquire_event)(struct trace_acquire_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + const struct perf_sample *sample); void (*acquired_event)(struct trace_acquired_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + const struct perf_sample *sample); void (*contended_event)(struct trace_contended_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + const struct perf_sample *sample); void (*release_event)(struct trace_release_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + const struct perf_sample *sample); }; static struct lock_seq_stat *get_seq(struct thread_stat *ts, void *addr) @@ -416,10 +405,7 @@ enum acquire_flags { static void report_lock_acquire_event(struct trace_acquire_event *acquire_event, - struct event_format *__event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + const struct perf_sample *sample) { struct lock_stat *ls; struct thread_stat *ts; @@ -429,7 +415,7 @@ report_lock_acquire_event(struct trace_acquire_event *acquire_event, if (ls->discard) return; - ts = thread_stat_findnew(thread->pid); + ts = thread_stat_findnew(sample->tid); seq = get_seq(ts, acquire_event->addr); switch (seq->state) { @@ -473,18 +459,16 @@ broken: } ls->nr_acquire++; - seq->prev_event_time = timestamp; + seq->prev_event_time = sample->time; end: return; } static void report_lock_acquired_event(struct trace_acquired_event *acquired_event, - struct event_format *__event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + const struct perf_sample *sample) { + u64 timestamp = sample->time; struct lock_stat *ls; struct thread_stat *ts; struct lock_seq_stat *seq; @@ -494,7 +478,7 @@ report_lock_acquired_event(struct trace_acquired_event *acquired_event, if (ls->discard) return; - ts = thread_stat_findnew(thread->pid); + ts = thread_stat_findnew(sample->tid); seq = get_seq(ts, acquired_event->addr); switch (seq->state) { @@ -536,10 +520,7 @@ end: static void report_lock_contended_event(struct trace_contended_event *contended_event, - struct event_format *__event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + const struct perf_sample *sample) { struct lock_stat *ls; struct thread_stat *ts; @@ -549,7 +530,7 @@ report_lock_contended_event(struct trace_contended_event *contended_event, if (ls->discard) return; - ts = thread_stat_findnew(thread->pid); + ts = thread_stat_findnew(sample->tid); seq = get_seq(ts, contended_event->addr); switch (seq->state) { @@ -576,17 +557,14 @@ report_lock_contended_event(struct trace_contended_event *contended_event, seq->state = SEQ_STATE_CONTENDED; ls->nr_contended++; - seq->prev_event_time = timestamp; + seq->prev_event_time = sample->time; end: return; } static void report_lock_release_event(struct trace_release_event *release_event, - struct event_format *__event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + const struct perf_sample *sample) { struct lock_stat *ls; struct thread_stat *ts; @@ -596,7 +574,7 @@ report_lock_release_event(struct trace_release_event *release_event, if (ls->discard) return; - ts = thread_stat_findnew(thread->pid); + ts = thread_stat_findnew(sample->tid); seq = get_seq(ts, release_event->addr); switch (seq->state) { @@ -645,14 +623,12 @@ static struct trace_lock_handler report_lock_ops = { static struct trace_lock_handler *trace_handler; -static void -process_lock_acquire_event(void *data, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static void perf_evsel__process_lock_acquire(struct perf_evsel *evsel, + struct perf_sample *sample) { struct trace_acquire_event acquire_event; + struct event_format *event = evsel->tp_format; + void *data = sample->raw_data; u64 tmp; /* this is required for casting... */ tmp = raw_field_value(event, "lockdep_addr", data); @@ -661,17 +637,15 @@ process_lock_acquire_event(void *data, acquire_event.flag = (int)raw_field_value(event, "flag", data); if (trace_handler->acquire_event) - trace_handler->acquire_event(&acquire_event, event, cpu, timestamp, thread); + trace_handler->acquire_event(&acquire_event, sample); } -static void -process_lock_acquired_event(void *data, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static void perf_evsel__process_lock_acquired(struct perf_evsel *evsel, + struct perf_sample *sample) { struct trace_acquired_event acquired_event; + struct event_format *event = evsel->tp_format; + void *data = sample->raw_data; u64 tmp; /* this is required for casting... */ tmp = raw_field_value(event, "lockdep_addr", data); @@ -679,17 +653,15 @@ process_lock_acquired_event(void *data, acquired_event.name = (char *)raw_field_ptr(event, "name", data); if (trace_handler->acquire_event) - trace_handler->acquired_event(&acquired_event, event, cpu, timestamp, thread); + trace_handler->acquired_event(&acquired_event, sample); } -static void -process_lock_contended_event(void *data, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static void perf_evsel__process_lock_contended(struct perf_evsel *evsel, + struct perf_sample *sample) { struct trace_contended_event contended_event; + struct event_format *event = evsel->tp_format; + void *data = sample->raw_data; u64 tmp; /* this is required for casting... */ tmp = raw_field_value(event, "lockdep_addr", data); @@ -697,17 +669,15 @@ process_lock_contended_event(void *data, contended_event.name = (char *)raw_field_ptr(event, "name", data); if (trace_handler->acquire_event) - trace_handler->contended_event(&contended_event, event, cpu, timestamp, thread); + trace_handler->contended_event(&contended_event, sample); } -static void -process_lock_release_event(void *data, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) +static void perf_evsel__process_lock_release(struct perf_evsel *evsel, + struct perf_sample *sample) { struct trace_release_event release_event; + struct event_format *event = evsel->tp_format; + void *data = sample->raw_data; u64 tmp; /* this is required for casting... */ tmp = raw_field_value(event, "lockdep_addr", data); @@ -715,26 +685,22 @@ process_lock_release_event(void *data, release_event.name = (char *)raw_field_ptr(event, "name", data); if (trace_handler->acquire_event) - trace_handler->release_event(&release_event, event, cpu, timestamp, thread); + trace_handler->release_event(&release_event, sample); } -static void -process_raw_event(void *data, int cpu, u64 timestamp, struct thread *thread) +static void perf_evsel__process_lock_event(struct perf_evsel *evsel, + struct perf_sample *sample) { - struct event_format *event; - int type; - - type = trace_parse_common_type(session->pevent, data); - event = pevent_find_event(session->pevent, type); + struct event_format *event = evsel->tp_format; if (!strcmp(event->name, "lock_acquire")) - process_lock_acquire_event(data, event, cpu, timestamp, thread); + perf_evsel__process_lock_acquire(evsel, sample); if (!strcmp(event->name, "lock_acquired")) - process_lock_acquired_event(data, event, cpu, timestamp, thread); + perf_evsel__process_lock_acquired(evsel, sample); if (!strcmp(event->name, "lock_contended")) - process_lock_contended_event(data, event, cpu, timestamp, thread); + perf_evsel__process_lock_contended(evsel, sample); if (!strcmp(event->name, "lock_release")) - process_lock_release_event(data, event, cpu, timestamp, thread); + perf_evsel__process_lock_release(evsel, sample); } static void print_bad_events(int bad, int total) @@ -849,7 +815,7 @@ static void dump_info(void) static int process_sample_event(struct perf_tool *tool __used, union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __used, + struct perf_evsel *evsel, struct machine *machine) { struct thread *thread = machine__findnew_thread(machine, sample->tid); @@ -860,8 +826,7 @@ static int process_sample_event(struct perf_tool *tool __used, return -1; } - process_raw_event(sample->raw_data, sample->cpu, sample->time, thread); - + perf_evsel__process_lock_event(evsel, sample); return 0; } @@ -938,16 +903,19 @@ static const struct option lock_options[] = { OPT_END() }; +static const char * const lock_tracepoints[] = { + "lock:lock_acquire", /* CONFIG_LOCKDEP */ + "lock:lock_acquired", /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + "lock:lock_contended", /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + "lock:lock_release", /* CONFIG_LOCKDEP */ +}; + static const char *record_args[] = { "record", "-R", "-f", "-m", "1024", "-c", "1", - "-e", "lock:lock_acquire", - "-e", "lock:lock_acquired", - "-e", "lock:lock_contended", - "-e", "lock:lock_release", }; static int __cmd_record(int argc, const char **argv) @@ -955,15 +923,31 @@ static int __cmd_record(int argc, const char **argv) unsigned int rec_argc, i, j; const char **rec_argv; + for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) { + if (!is_valid_tracepoint(lock_tracepoints[i])) { + pr_err("tracepoint %s is not enabled. " + "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n", + lock_tracepoints[i]); + return 1; + } + } + rec_argc = ARRAY_SIZE(record_args) + argc - 1; - rec_argv = calloc(rec_argc + 1, sizeof(char *)); + /* factor of 2 is for -e in front of each tracepoint */ + rec_argc += 2 * ARRAY_SIZE(lock_tracepoints); + rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (rec_argv == NULL) return -ENOMEM; for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); + for (j = 0; j < ARRAY_SIZE(lock_tracepoints); j++) { + rec_argv[i++] = "-e"; + rec_argv[i++] = strdup(lock_tracepoints[j]); + } + for (j = 1; j < (unsigned int)argc; j++, i++) rec_argv[i] = argv[j]; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 4db6e1ba54e3..479ff2a038fc 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -31,6 +31,15 @@ #include <sched.h> #include <sys/mman.h> +#define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: " + +#ifdef NO_LIBUNWIND_SUPPORT +static char callchain_help[] = CALLCHAIN_HELP "[fp]"; +#else +static unsigned long default_stack_dump_size = 8192; +static char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf"; +#endif + enum write_mode_t { WRITE_FORCE, WRITE_APPEND @@ -163,12 +172,12 @@ static bool perf_evlist__equal(struct perf_evlist *evlist, if (evlist->nr_entries != other->nr_entries) return false; - pair = list_entry(other->entries.next, struct perf_evsel, node); + pair = perf_evlist__first(other); list_for_each_entry(pos, &evlist->entries, node) { if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0)) return false; - pair = list_entry(pair->node.next, struct perf_evsel, node); + pair = perf_evsel__next(pair); } return true; @@ -176,18 +185,18 @@ static bool perf_evlist__equal(struct perf_evlist *evlist, static void perf_record__open(struct perf_record *rec) { - struct perf_evsel *pos, *first; + struct perf_evsel *pos; struct perf_evlist *evlist = rec->evlist; struct perf_session *session = rec->session; struct perf_record_opts *opts = &rec->opts; - first = list_entry(evlist->entries.next, struct perf_evsel, node); - perf_evlist__config_attrs(evlist, opts); + if (opts->group) + perf_evlist__set_leader(evlist); + list_for_each_entry(pos, &evlist->entries, node) { struct perf_event_attr *attr = &pos->attr; - struct xyarray *group_fd = NULL; /* * Check if parse_single_tracepoint_event has already asked for * PERF_SAMPLE_TIME. @@ -202,16 +211,13 @@ static void perf_record__open(struct perf_record *rec) */ bool time_needed = attr->sample_type & PERF_SAMPLE_TIME; - if (opts->group && pos != first) - group_fd = first->fd; fallback_missing_features: if (opts->exclude_guest_missing) attr->exclude_guest = attr->exclude_host = 0; retry_sample_id: attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; try_again: - if (perf_evsel__open(pos, evlist->cpus, evlist->threads, - opts->group, group_fd) < 0) { + if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) { int err = errno; if (err == EPERM || err == EACCES) { @@ -732,6 +738,106 @@ error: return ret; } +#ifndef NO_LIBUNWIND_SUPPORT +static int get_stack_size(char *str, unsigned long *_size) +{ + char *endptr; + unsigned long size; + unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); + + size = strtoul(str, &endptr, 0); + + do { + if (*endptr) + break; + + size = round_up(size, sizeof(u64)); + if (!size || size > max_size) + break; + + *_size = size; + return 0; + + } while (0); + + pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", + max_size, str); + return -1; +} +#endif /* !NO_LIBUNWIND_SUPPORT */ + +static int +parse_callchain_opt(const struct option *opt __used, const char *arg, + int unset) +{ + struct perf_record *rec = (struct perf_record *)opt->value; + char *tok, *name, *saveptr = NULL; + char *buf; + int ret = -1; + + /* --no-call-graph */ + if (unset) + return 0; + + /* We specified default option if none is provided. */ + BUG_ON(!arg); + + /* We need buffer that we know we can write to. */ + buf = malloc(strlen(arg) + 1); + if (!buf) + return -ENOMEM; + + strcpy(buf, arg); + + tok = strtok_r((char *)buf, ",", &saveptr); + name = tok ? : (char *)buf; + + do { + /* Framepointer style */ + if (!strncmp(name, "fp", sizeof("fp"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + rec->opts.call_graph = CALLCHAIN_FP; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for -g fp\n"); + break; + +#ifndef NO_LIBUNWIND_SUPPORT + /* Dwarf style */ + } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { + ret = 0; + rec->opts.call_graph = CALLCHAIN_DWARF; + rec->opts.stack_dump_size = default_stack_dump_size; + + tok = strtok_r(NULL, ",", &saveptr); + if (tok) { + unsigned long size = 0; + + ret = get_stack_size(tok, &size); + rec->opts.stack_dump_size = size; + } + + if (!ret) + pr_debug("callchain: stack dump size %d\n", + rec->opts.stack_dump_size); +#endif /* !NO_LIBUNWIND_SUPPORT */ + } else { + pr_err("callchain: Unknown -g option " + "value: %s\n", arg); + break; + } + + } while (0); + + free(buf); + + if (!ret) + pr_debug("callchain: type %d\n", rec->opts.call_graph); + + return ret; +} + static const char * const record_usage[] = { "perf record [<options>] [<command>]", "perf record [<options>] -- <command> [<options>]", @@ -803,8 +909,9 @@ const struct option record_options[] = { "number of mmap data pages"), OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), - OPT_BOOLEAN('g', "call-graph", &record.opts.call_graph, - "do call-graph (stack chain/backtrace) recording"), + OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]", + callchain_help, &parse_callchain_opt, + "fp"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7c88a243b5db..d61825371adc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -69,8 +69,8 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { - err = machine__resolve_callchain(machine, al->thread, - sample->callchain, &parent); + err = machine__resolve_callchain(machine, evsel, al->thread, + sample, &parent); if (err) return err; } @@ -140,8 +140,8 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, struct hist_entry *he; if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { - err = machine__resolve_callchain(machine, al->thread, - sample->callchain, &parent); + err = machine__resolve_callchain(machine, evsel, al->thread, + sample, &parent); if (err) return err; } @@ -397,17 +397,17 @@ static int __cmd_report(struct perf_report *rep) desc); } - if (dump_trace) { - perf_session__fprintf_nr_events(session, stdout); - goto out_delete; - } - if (verbose > 3) perf_session__fprintf(session, stdout); if (verbose > 2) perf_session__fprintf_dsos(session, stdout); + if (dump_trace) { + perf_session__fprintf_nr_events(session, stdout); + goto out_delete; + } + nr_samples = 0; list_for_each_entry(pos, &session->evlist->entries, node) { struct hists *hists = &pos->hists; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 7a9ad2b1ee76..a25a023965bb 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -43,11 +43,6 @@ static u64 sleep_measurement_overhead; static unsigned long nr_tasks; -struct perf_sched { - struct perf_tool tool; - struct perf_session *session; -}; - struct sched_atom; struct task_desc { @@ -734,46 +729,30 @@ struct trace_sched_handler { void (*switch_event)(struct trace_switch_event *, struct machine *, struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + struct perf_sample *sample); void (*runtime_event)(struct trace_runtime_event *, struct machine *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + struct perf_sample *sample); void (*wakeup_event)(struct trace_wakeup_event *, struct machine *, struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + struct perf_sample *sample); void (*fork_event)(struct trace_fork_event *, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + struct event_format *event); void (*migrate_task_event)(struct trace_migrate_task_event *, - struct machine *machine, - struct event_format *, - int cpu, - u64 timestamp, - struct thread *thread); + struct machine *machine, + struct perf_sample *sample); }; static void replay_wakeup_event(struct trace_wakeup_event *wakeup_event, struct machine *machine __used, - struct event_format *event, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + struct event_format *event, struct perf_sample *sample) { struct task_desc *waker, *wakee; @@ -789,7 +768,7 @@ replay_wakeup_event(struct trace_wakeup_event *wakeup_event, waker = register_pid(wakeup_event->common_pid, "<unknown>"); wakee = register_pid(wakeup_event->pid, wakeup_event->comm); - add_sched_event_wakeup(waker, timestamp, wakee); + add_sched_event_wakeup(waker, sample->time, wakee); } static u64 cpu_last_switched[MAX_CPUS]; @@ -798,12 +777,11 @@ static void replay_switch_event(struct trace_switch_event *switch_event, struct machine *machine __used, struct event_format *event, - int cpu, - u64 timestamp, - struct thread *thread __used) + struct perf_sample *sample) { struct task_desc *prev, __used *next; - u64 timestamp0; + u64 timestamp0, timestamp = sample->time; + int cpu = sample->cpu; s64 delta; if (verbose) @@ -840,10 +818,7 @@ replay_switch_event(struct trace_switch_event *switch_event, static void replay_fork_event(struct trace_fork_event *fork_event, - struct event_format *event, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + struct event_format *event) { if (verbose) { printf("sched_fork event %p\n", event); @@ -949,10 +924,7 @@ static void thread_atoms_insert(struct thread *thread) static void latency_fork_event(struct trace_fork_event *fork_event __used, - struct event_format *event __used, - int cpu __used, - u64 timestamp __used, - struct thread *thread __used) + struct event_format *event __used) { /* should insert the newcomer */ } @@ -1032,13 +1004,12 @@ static void latency_switch_event(struct trace_switch_event *switch_event, struct machine *machine, struct event_format *event __used, - int cpu, - u64 timestamp, - struct thread *thread __used) + struct perf_sample *sample) { struct work_atoms *out_events, *in_events; struct thread *sched_out, *sched_in; - u64 timestamp0; + u64 timestamp0, timestamp = sample->time; + int cpu = sample->cpu; s64 delta; BUG_ON(cpu >= MAX_CPUS || cpu < 0); @@ -1083,14 +1054,12 @@ latency_switch_event(struct trace_switch_event *switch_event, static void latency_runtime_event(struct trace_runtime_event *runtime_event, - struct machine *machine, - struct event_format *event __used, - int cpu, - u64 timestamp, - struct thread *this_thread __used) + struct machine *machine, struct perf_sample *sample) { struct thread *thread = machine__findnew_thread(machine, runtime_event->pid); struct work_atoms *atoms = thread_atoms_search(&atom_root, thread, &cmp_pid); + u64 timestamp = sample->time; + int cpu = sample->cpu; BUG_ON(cpu >= MAX_CPUS || cpu < 0); if (!atoms) { @@ -1106,15 +1075,13 @@ latency_runtime_event(struct trace_runtime_event *runtime_event, static void latency_wakeup_event(struct trace_wakeup_event *wakeup_event, - struct machine *machine, - struct event_format *__event __used, - int cpu __used, - u64 timestamp, - struct thread *thread __used) + struct machine *machine, struct event_format *event __used, + struct perf_sample *sample) { struct work_atoms *atoms; struct work_atom *atom; struct thread *wakee; + u64 timestamp = sample->time; /* Note for later, it may be interesting to observe the failing cases */ if (!wakeup_event->success) @@ -1154,12 +1121,9 @@ latency_wakeup_event(struct trace_wakeup_event *wakeup_event, static void latency_migrate_task_event(struct trace_migrate_task_event *migrate_task_event, - struct machine *machine, - struct event_format *__event __used, - int cpu __used, - u64 timestamp, - struct thread *thread __used) + struct machine *machine, struct perf_sample *sample) { + u64 timestamp = sample->time; struct work_atoms *atoms; struct work_atom *atom; struct thread *migrant; @@ -1369,7 +1333,7 @@ process_sched_wakeup_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, struct machine *machine, - struct thread *thread) + struct thread *thread __used) { void *data = sample->raw_data; struct trace_wakeup_event wakeup_event; @@ -1383,8 +1347,7 @@ process_sched_wakeup_event(struct perf_tool *tool __used, FILL_FIELD(wakeup_event, cpu, event, data); if (trace_handler->wakeup_event) - trace_handler->wakeup_event(&wakeup_event, machine, event, - sample->cpu, sample->time, thread); + trace_handler->wakeup_event(&wakeup_event, machine, event, sample); } /* @@ -1404,15 +1367,13 @@ static void map_switch_event(struct trace_switch_event *switch_event, struct machine *machine, struct event_format *event __used, - int this_cpu, - u64 timestamp, - struct thread *thread __used) + struct perf_sample *sample) { struct thread *sched_out __used, *sched_in; int new_shortname; - u64 timestamp0; + u64 timestamp0, timestamp = sample->time; s64 delta; - int cpu; + int cpu, this_cpu = sample->cpu; BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0); @@ -1484,7 +1445,7 @@ process_sched_switch_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, struct machine *machine, - struct thread *thread) + struct thread *thread __used) { int this_cpu = sample->cpu; void *data = sample->raw_data; @@ -1509,8 +1470,7 @@ process_sched_switch_event(struct perf_tool *tool __used, nr_context_switch_bugs++; } if (trace_handler->switch_event) - trace_handler->switch_event(&switch_event, machine, event, - this_cpu, sample->time, thread); + trace_handler->switch_event(&switch_event, machine, event, sample); curr_pid[this_cpu] = switch_event.next_pid; } @@ -1520,7 +1480,7 @@ process_sched_runtime_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, struct machine *machine, - struct thread *thread) + struct thread *thread __used) { void *data = sample->raw_data; struct trace_runtime_event runtime_event; @@ -1531,8 +1491,7 @@ process_sched_runtime_event(struct perf_tool *tool __used, FILL_FIELD(runtime_event, vruntime, event, data); if (trace_handler->runtime_event) - trace_handler->runtime_event(&runtime_event, machine, event, - sample->cpu, sample->time, thread); + trace_handler->runtime_event(&runtime_event, machine, sample); } static void @@ -1540,7 +1499,7 @@ process_sched_fork_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, struct machine *machine __used, - struct thread *thread) + struct thread *thread __used) { void *data = sample->raw_data; struct trace_fork_event fork_event; @@ -1553,8 +1512,7 @@ process_sched_fork_event(struct perf_tool *tool __used, FILL_FIELD(fork_event, child_pid, event, data); if (trace_handler->fork_event) - trace_handler->fork_event(&fork_event, event, - sample->cpu, sample->time, thread); + trace_handler->fork_event(&fork_event, event); } static void @@ -1573,7 +1531,7 @@ process_sched_migrate_task_event(struct perf_tool *tool __used, struct event_format *event, struct perf_sample *sample, struct machine *machine, - struct thread *thread) + struct thread *thread __used) { void *data = sample->raw_data; struct trace_migrate_task_event migrate_task_event; @@ -1586,9 +1544,7 @@ process_sched_migrate_task_event(struct perf_tool *tool __used, FILL_FIELD(migrate_task_event, cpu, event, data); if (trace_handler->migrate_task_event) - trace_handler->migrate_task_event(&migrate_task_event, machine, - event, sample->cpu, - sample->time, thread); + trace_handler->migrate_task_event(&migrate_task_event, machine, sample); } typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event_format *event, @@ -1596,14 +1552,12 @@ typedef void (*tracepoint_handler)(struct perf_tool *tool, struct event_format * struct machine *machine, struct thread *thread); -static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, +static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __used, union perf_event *event __used, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine) { - struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - struct pevent *pevent = sched->session->pevent; struct thread *thread = machine__findnew_thread(machine, sample->pid); if (thread == NULL) { @@ -1617,25 +1571,18 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool, if (evsel->handler.func != NULL) { tracepoint_handler f = evsel->handler.func; - - if (evsel->handler.data == NULL) - evsel->handler.data = pevent_find_event(pevent, - evsel->attr.config); - - f(tool, evsel->handler.data, sample, machine, thread); + f(tool, evsel->tp_format, sample, machine, thread); } return 0; } -static struct perf_sched perf_sched = { - .tool = { - .sample = perf_sched__process_tracepoint_sample, - .comm = perf_event__process_comm, - .lost = perf_event__process_lost, - .fork = perf_event__process_task, - .ordered_samples = true, - }, +static struct perf_tool perf_sched = { + .sample = perf_sched__process_tracepoint_sample, + .comm = perf_event__process_comm, + .lost = perf_event__process_lost, + .fork = perf_event__process_task, + .ordered_samples = true, }; static void read_events(bool destroy, struct perf_session **psession) @@ -1652,18 +1599,15 @@ static void read_events(bool destroy, struct perf_session **psession) }; struct perf_session *session; - session = perf_session__new(input_name, O_RDONLY, 0, false, - &perf_sched.tool); + session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_sched); if (session == NULL) die("No Memory"); - perf_sched.session = session; - err = perf_session__set_tracepoints_handlers(session, handlers); assert(err == 0); if (perf_session__has_traces(session, "record -R")) { - err = perf_session__process_events(session, &perf_sched.tool); + err = perf_session__process_events(session, &perf_sched); if (err) die("Failed to process events, error %d", err); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 1e60ab70b2b1..2d6e3b226aad 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -28,11 +28,6 @@ static bool system_wide; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); -struct perf_script { - struct perf_tool tool; - struct perf_session *session; -}; - enum perf_output_field { PERF_OUTPUT_COMM = 1U << 0, PERF_OUTPUT_TID = 1U << 1, @@ -262,14 +257,11 @@ static int perf_session__check_output_opt(struct perf_session *session) return 0; } -static void print_sample_start(struct pevent *pevent, - struct perf_sample *sample, +static void print_sample_start(struct perf_sample *sample, struct thread *thread, struct perf_evsel *evsel) { - int type; struct perf_event_attr *attr = &evsel->attr; - struct event_format *event; const char *evname = NULL; unsigned long secs; unsigned long usecs; @@ -307,20 +299,7 @@ static void print_sample_start(struct pevent *pevent, } if (PRINT_FIELD(EVNAME)) { - if (attr->type == PERF_TYPE_TRACEPOINT) { - /* - * XXX Do we really need this here? - * perf_evlist__set_tracepoint_names should have done - * this already - */ - type = trace_parse_common_type(pevent, - sample->raw_data); - event = pevent_find_event(pevent, type); - if (event) - evname = event->name; - } else - evname = perf_evsel__name(evsel); - + evname = perf_evsel__name(evsel); printf("%s: ", evname ? evname : "[unknown]"); } } @@ -401,7 +380,7 @@ static void print_sample_bts(union perf_event *event, printf(" "); else printf("\n"); - perf_event__print_ip(event, sample, machine, + perf_evsel__print_ip(evsel, event, sample, machine, PRINT_FIELD(SYM), PRINT_FIELD(DSO), PRINT_FIELD(SYMOFFSET)); } @@ -415,19 +394,17 @@ static void print_sample_bts(union perf_event *event, printf("\n"); } -static void process_event(union perf_event *event __unused, - struct pevent *pevent, - struct perf_sample *sample, - struct perf_evsel *evsel, - struct machine *machine, - struct thread *thread) +static void process_event(union perf_event *event, struct perf_sample *sample, + struct perf_evsel *evsel, struct machine *machine, + struct addr_location *al) { struct perf_event_attr *attr = &evsel->attr; + struct thread *thread = al->thread; if (output[attr->type].fields == 0) return; - print_sample_start(pevent, sample, thread, evsel); + print_sample_start(sample, thread, evsel); if (is_bts_event(attr)) { print_sample_bts(event, sample, evsel, machine, thread); @@ -435,9 +412,8 @@ static void process_event(union perf_event *event __unused, } if (PRINT_FIELD(TRACE)) - print_trace_event(pevent, sample->cpu, sample->raw_data, - sample->raw_size); - + event_format__print(evsel->tp_format, sample->cpu, + sample->raw_data, sample->raw_size); if (PRINT_FIELD(ADDR)) print_sample_addr(event, sample, machine, thread, attr); @@ -446,7 +422,7 @@ static void process_event(union perf_event *event __unused, printf(" "); else printf("\n"); - perf_event__print_ip(event, sample, machine, + perf_evsel__print_ip(evsel, event, sample, machine, PRINT_FIELD(SYM), PRINT_FIELD(DSO), PRINT_FIELD(SYMOFFSET)); } @@ -505,7 +481,6 @@ static int process_sample_event(struct perf_tool *tool __used, struct machine *machine) { struct addr_location al; - struct perf_script *scr = container_of(tool, struct perf_script, tool); struct thread *thread = machine__findnew_thread(machine, event->ip.tid); if (thread == NULL) { @@ -537,27 +512,24 @@ static int process_sample_event(struct perf_tool *tool __used, if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) return 0; - scripting_ops->process_event(event, scr->session->pevent, - sample, evsel, machine, thread); + scripting_ops->process_event(event, sample, evsel, machine, &al); evsel->hists.stats.total_period += sample->period; return 0; } -static struct perf_script perf_script = { - .tool = { - .sample = process_sample_event, - .mmap = perf_event__process_mmap, - .comm = perf_event__process_comm, - .exit = perf_event__process_task, - .fork = perf_event__process_task, - .attr = perf_event__process_attr, - .event_type = perf_event__process_event_type, - .tracing_data = perf_event__process_tracing_data, - .build_id = perf_event__process_build_id, - .ordered_samples = true, - .ordering_requires_timestamps = true, - }, +static struct perf_tool perf_script = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .comm = perf_event__process_comm, + .exit = perf_event__process_task, + .fork = perf_event__process_task, + .attr = perf_event__process_attr, + .event_type = perf_event__process_event_type, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .ordered_samples = true, + .ordering_requires_timestamps = true, }; extern volatile int session_done; @@ -573,7 +545,7 @@ static int __cmd_script(struct perf_session *session) signal(SIGINT, sig_handler); - ret = perf_session__process_events(session, &perf_script.tool); + ret = perf_session__process_events(session, &perf_script); if (debug_mode) pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); @@ -1356,12 +1328,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __used) setup_pager(); session = perf_session__new(input_name, O_RDONLY, 0, false, - &perf_script.tool); + &perf_script); if (session == NULL) return -ENOMEM; - perf_script.session = session; - if (cpu_list) { if (perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap)) return -1; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 861f0aec77ae..d53d8ab099b1 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -281,13 +281,9 @@ static int create_perf_stat_counter(struct perf_evsel *evsel, struct perf_evsel *first) { struct perf_event_attr *attr = &evsel->attr; - struct xyarray *group_fd = NULL; bool exclude_guest_missing = false; int ret; - if (group && evsel != first) - group_fd = first->fd; - if (scale) attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; @@ -299,8 +295,7 @@ retry: evsel->attr.exclude_guest = evsel->attr.exclude_host = 0; if (perf_target__has_cpu(&target)) { - ret = perf_evsel__open_per_cpu(evsel, evsel_list->cpus, - group, group_fd); + ret = perf_evsel__open_per_cpu(evsel, evsel_list->cpus); if (ret) goto check_ret; return 0; @@ -311,8 +306,7 @@ retry: attr->enable_on_exec = 1; } - ret = perf_evsel__open_per_thread(evsel, evsel_list->threads, - group, group_fd); + ret = perf_evsel__open_per_thread(evsel, evsel_list->threads); if (!ret) return 0; /* fall through */ @@ -483,7 +477,10 @@ static int run_perf_stat(int argc __used, const char **argv) close(child_ready_pipe[0]); } - first = list_entry(evsel_list->entries.next, struct perf_evsel, node); + if (group) + perf_evlist__set_leader(evsel_list); + + first = perf_evlist__first(evsel_list); list_for_each_entry(counter, &evsel_list->entries, node) { if (create_perf_stat_counter(counter, first) < 0) { diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 1d592f5cbea9..381d5ab87124 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -294,7 +294,7 @@ static int test__open_syscall_event(void) goto out_thread_map_delete; } - if (perf_evsel__open_per_thread(evsel, threads, false, NULL) < 0) { + if (perf_evsel__open_per_thread(evsel, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -369,7 +369,7 @@ static int test__open_syscall_event_on_all_cpus(void) goto out_thread_map_delete; } - if (perf_evsel__open(evsel, cpus, threads, false, NULL) < 0) { + if (perf_evsel__open(evsel, cpus, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -533,7 +533,7 @@ static int test__basic_mmap(void) perf_evlist__add(evlist, evsels[i]); - if (perf_evsel__open(evsels[i], cpus, threads, false, NULL) < 0) { + if (perf_evsel__open(evsels[i], cpus, threads) < 0) { pr_debug("failed to open counter: %s, " "tweak /proc/sys/kernel/perf_event_paranoid?\n", strerror(errno)); @@ -710,7 +710,7 @@ static int test__PERF_RECORD(void) /* * Config the evsels, setting attr->comm on the first one, etc. */ - evsel = list_entry(evlist->entries.next, struct perf_evsel, node); + evsel = perf_evlist__first(evlist); evsel->attr.sample_type |= PERF_SAMPLE_CPU; evsel->attr.sample_type |= PERF_SAMPLE_TID; evsel->attr.sample_type |= PERF_SAMPLE_TIME; @@ -737,7 +737,7 @@ static int test__PERF_RECORD(void) * Call sys_perf_event_open on all the fds on all the evsels, * grouping them if asked to. */ - err = perf_evlist__open(evlist, opts.group); + err = perf_evlist__open(evlist); if (err < 0) { pr_debug("perf_evlist__open: %s\n", strerror(errno)); goto out_delete_evlist; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 68cd61ef6ac5..0513aaa659f9 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -509,7 +509,7 @@ static void perf_top__handle_keypress(struct perf_top *top, int c) prompt_integer(&counter, "Enter details event counter"); if (counter >= top->evlist->nr_entries) { - top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node); + top->sym_evsel = perf_evlist__first(top->evlist); fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel)); sleep(1); break; @@ -518,7 +518,7 @@ static void perf_top__handle_keypress(struct perf_top *top, int c) if (top->sym_evsel->idx == counter) break; } else - top->sym_evsel = list_entry(top->evlist->entries.next, struct perf_evsel, node); + top->sym_evsel = perf_evlist__first(top->evlist); break; case 'f': prompt_integer(&top->count_filter, "Enter display event count filter"); @@ -783,8 +783,10 @@ static void perf_event__process_sample(struct perf_tool *tool, if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) { - err = machine__resolve_callchain(machine, al.thread, - sample->callchain, &parent); + err = machine__resolve_callchain(machine, evsel, + al.thread, sample, + &parent); + if (err) return; } @@ -884,17 +886,14 @@ static void perf_top__mmap_read(struct perf_top *top) static void perf_top__start_counters(struct perf_top *top) { - struct perf_evsel *counter, *first; + struct perf_evsel *counter; struct perf_evlist *evlist = top->evlist; - first = list_entry(evlist->entries.next, struct perf_evsel, node); + if (top->group) + perf_evlist__set_leader(evlist); list_for_each_entry(counter, &evlist->entries, node) { struct perf_event_attr *attr = &counter->attr; - struct xyarray *group_fd = NULL; - - if (top->group && counter != first) - group_fd = first->fd; attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; @@ -925,8 +924,7 @@ retry_sample_id: attr->sample_id_all = top->sample_id_all_missing ? 0 : 1; try_again: if (perf_evsel__open(counter, top->evlist->cpus, - top->evlist->threads, top->group, - group_fd) < 0) { + top->evlist->threads) < 0) { int err = errno; if (err == EPERM || err == EACCES) { @@ -1328,7 +1326,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) pos->attr.sample_period = top.default_interval; } - top.sym_evsel = list_entry(top.evlist->entries.next, struct perf_evsel, node); + top.sym_evsel = perf_evlist__first(top.evlist); symbol_conf.priv_size = sizeof(struct annotation); diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index d695fe40fbff..0303ec692274 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -18,7 +18,7 @@ perf-stat mainporcelain common perf-timechart mainporcelain common perf-top mainporcelain common perf-script mainporcelain common -perf-probe mainporcelain common +perf-probe mainporcelain full perf-kmem mainporcelain common perf-lock mainporcelain common perf-kvm mainporcelain common diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index 6c18785a6417..2f1156a62ab7 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak @@ -154,3 +154,28 @@ int main(void) return 0; } endef + +ifndef NO_LIBUNWIND +define SOURCE_LIBUNWIND +#include <libunwind.h> +#include <stdlib.h> + +extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +int main(void) +{ + unw_addr_space_t addr_space; + addr_space = unw_create_addr_space(NULL, 0); + unw_init_remote(NULL, addr_space, NULL); + dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL); + return 0; +} +endef +endif diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 2b2e225a4d4c..e7840e500715 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -24,6 +24,39 @@ const char perf_more_info_string[] = int use_browser = -1; static int use_pager = -1; +struct cmd_struct { + const char *cmd; + int (*fn)(int, const char **, const char *); + int option; +}; + +static struct cmd_struct commands[] = { + { "buildid-cache", cmd_buildid_cache, 0 }, + { "buildid-list", cmd_buildid_list, 0 }, + { "diff", cmd_diff, 0 }, + { "evlist", cmd_evlist, 0 }, + { "help", cmd_help, 0 }, + { "list", cmd_list, 0 }, + { "record", cmd_record, 0 }, + { "report", cmd_report, 0 }, + { "bench", cmd_bench, 0 }, + { "stat", cmd_stat, 0 }, + { "timechart", cmd_timechart, 0 }, + { "top", cmd_top, 0 }, + { "annotate", cmd_annotate, 0 }, + { "version", cmd_version, 0 }, + { "script", cmd_script, 0 }, + { "sched", cmd_sched, 0 }, +#ifndef NO_LIBELF_SUPPORT + { "probe", cmd_probe, 0 }, +#endif + { "kmem", cmd_kmem, 0 }, + { "lock", cmd_lock, 0 }, + { "kvm", cmd_kvm, 0 }, + { "test", cmd_test, 0 }, + { "inject", cmd_inject, 0 }, +}; + struct pager_config { const char *cmd; int val; @@ -160,6 +193,14 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) fprintf(stderr, "dir: %s\n", debugfs_mountpoint); if (envchanged) *envchanged = 1; + } else if (!strcmp(cmd, "--list-cmds")) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(commands); i++) { + struct cmd_struct *p = commands+i; + printf("%s ", p->cmd); + } + exit(0); } else { fprintf(stderr, "Unknown option: %s\n", cmd); usage(perf_usage_string); @@ -245,12 +286,6 @@ const char perf_version_string[] = PERF_VERSION; */ #define NEED_WORK_TREE (1<<2) -struct cmd_struct { - const char *cmd; - int (*fn)(int, const char **, const char *); - int option; -}; - static int run_builtin(struct cmd_struct *p, int argc, const char **argv) { int status; @@ -296,30 +331,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; - static struct cmd_struct commands[] = { - { "buildid-cache", cmd_buildid_cache, 0 }, - { "buildid-list", cmd_buildid_list, 0 }, - { "diff", cmd_diff, 0 }, - { "evlist", cmd_evlist, 0 }, - { "help", cmd_help, 0 }, - { "list", cmd_list, 0 }, - { "record", cmd_record, 0 }, - { "report", cmd_report, 0 }, - { "bench", cmd_bench, 0 }, - { "stat", cmd_stat, 0 }, - { "timechart", cmd_timechart, 0 }, - { "top", cmd_top, 0 }, - { "annotate", cmd_annotate, 0 }, - { "version", cmd_version, 0 }, - { "script", cmd_script, 0 }, - { "sched", cmd_sched, 0 }, - { "probe", cmd_probe, 0 }, - { "kmem", cmd_kmem, 0 }, - { "lock", cmd_lock, 0 }, - { "kvm", cmd_kvm, 0 }, - { "test", cmd_test, 0 }, - { "inject", cmd_inject, 0 }, - }; unsigned int i; static const char ext[] = STRIP_EXTENSION; diff --git a/tools/perf/perf.h b/tools/perf/perf.h index f960ccb2edc6..87f4ec6d1f36 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -209,9 +209,15 @@ void pthread__unblock_sigwinch(void); #include "util/target.h" +enum perf_call_graph_mode { + CALLCHAIN_NONE, + CALLCHAIN_FP, + CALLCHAIN_DWARF +}; + struct perf_record_opts { struct perf_target target; - bool call_graph; + int call_graph; bool group; bool inherit_stat; bool no_delay; @@ -230,6 +236,7 @@ struct perf_record_opts { u64 branch_stack; u64 default_interval; u64 user_interval; + u16 stack_dump_size; }; #endif diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py new file mode 100755 index 000000000000..9e0985794e20 --- /dev/null +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py @@ -0,0 +1,94 @@ +# EventClass.py +# +# This is a library defining some events types classes, which could +# be used by other scripts to analyzing the perf samples. +# +# Currently there are just a few classes defined for examples, +# PerfEvent is the base class for all perf event sample, PebsEvent +# is a HW base Intel x86 PEBS event, and user could add more SW/HW +# event classes based on requirements. + +import struct + +# Event types, user could add more here +EVTYPE_GENERIC = 0 +EVTYPE_PEBS = 1 # Basic PEBS event +EVTYPE_PEBS_LL = 2 # PEBS event with load latency info +EVTYPE_IBS = 3 + +# +# Currently we don't have good way to tell the event type, but by +# the size of raw buffer, raw PEBS event with load latency data's +# size is 176 bytes, while the pure PEBS event's size is 144 bytes. +# +def create_event(name, comm, dso, symbol, raw_buf): + if (len(raw_buf) == 144): + event = PebsEvent(name, comm, dso, symbol, raw_buf) + elif (len(raw_buf) == 176): + event = PebsNHM(name, comm, dso, symbol, raw_buf) + else: + event = PerfEvent(name, comm, dso, symbol, raw_buf) + + return event + +class PerfEvent(object): + event_num = 0 + def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_GENERIC): + self.name = name + self.comm = comm + self.dso = dso + self.symbol = symbol + self.raw_buf = raw_buf + self.ev_type = ev_type + PerfEvent.event_num += 1 + + def show(self): + print "PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % (self.name, self.symbol, self.comm, self.dso) + +# +# Basic Intel PEBS (Precise Event-based Sampling) event, whose raw buffer +# contains the context info when that event happened: the EFLAGS and +# linear IP info, as well as all the registers. +# +class PebsEvent(PerfEvent): + pebs_num = 0 + def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_PEBS): + tmp_buf=raw_buf[0:80] + flags, ip, ax, bx, cx, dx, si, di, bp, sp = struct.unpack('QQQQQQQQQQ', tmp_buf) + self.flags = flags + self.ip = ip + self.ax = ax + self.bx = bx + self.cx = cx + self.dx = dx + self.si = si + self.di = di + self.bp = bp + self.sp = sp + + PerfEvent.__init__(self, name, comm, dso, symbol, raw_buf, ev_type) + PebsEvent.pebs_num += 1 + del tmp_buf + +# +# Intel Nehalem and Westmere support PEBS plus Load Latency info which lie +# in the four 64 bit words write after the PEBS data: +# Status: records the IA32_PERF_GLOBAL_STATUS register value +# DLA: Data Linear Address (EIP) +# DSE: Data Source Encoding, where the latency happens, hit or miss +# in L1/L2/L3 or IO operations +# LAT: the actual latency in cycles +# +class PebsNHM(PebsEvent): + pebs_nhm_num = 0 + def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_PEBS_LL): + tmp_buf=raw_buf[144:176] + status, dla, dse, lat = struct.unpack('QQQQ', tmp_buf) + self.status = status + self.dla = dla + self.dse = dse + self.lat = lat + + PebsEvent.__init__(self, name, comm, dso, symbol, raw_buf, ev_type) + PebsNHM.pebs_nhm_num += 1 + del tmp_buf diff --git a/tools/perf/scripts/python/event_analyzing_sample.py b/tools/perf/scripts/python/event_analyzing_sample.py new file mode 100644 index 000000000000..163c39fa12d9 --- /dev/null +++ b/tools/perf/scripts/python/event_analyzing_sample.py @@ -0,0 +1,189 @@ +# event_analyzing_sample.py: general event handler in python +# +# Current perf report is already very powerful with the annotation integrated, +# and this script is not trying to be as powerful as perf report, but +# providing end user/developer a flexible way to analyze the events other +# than trace points. +# +# The 2 database related functions in this script just show how to gather +# the basic information, and users can modify and write their own functions +# according to their specific requirement. +# +# The first function "show_general_events" just does a basic grouping for all +# generic events with the help of sqlite, and the 2nd one "show_pebs_ll" is +# for a x86 HW PMU event: PEBS with load latency data. +# + +import os +import sys +import math +import struct +import sqlite3 + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from EventClass import * + +# +# If the perf.data has a big number of samples, then the insert operation +# will be very time consuming (about 10+ minutes for 10000 samples) if the +# .db database is on disk. Move the .db file to RAM based FS to speedup +# the handling, which will cut the time down to several seconds. +# +con = sqlite3.connect("/dev/shm/perf.db") +con.isolation_level = None + +def trace_begin(): + print "In trace_begin:\n" + + # + # Will create several tables at the start, pebs_ll is for PEBS data with + # load latency info, while gen_events is for general event. + # + con.execute(""" + create table if not exists gen_events ( + name text, + symbol text, + comm text, + dso text + );""") + con.execute(""" + create table if not exists pebs_ll ( + name text, + symbol text, + comm text, + dso text, + flags integer, + ip integer, + status integer, + dse integer, + dla integer, + lat integer + );""") + +# +# Create and insert event object to a database so that user could +# do more analysis with simple database commands. +# +def process_event(param_dict): + event_attr = param_dict["attr"] + sample = param_dict["sample"] + raw_buf = param_dict["raw_buf"] + comm = param_dict["comm"] + name = param_dict["ev_name"] + + # Symbol and dso info are not always resolved + if (param_dict.has_key("dso")): + dso = param_dict["dso"] + else: + dso = "Unknown_dso" + + if (param_dict.has_key("symbol")): + symbol = param_dict["symbol"] + else: + symbol = "Unknown_symbol" + + # Create the event object and insert it to the right table in database + event = create_event(name, comm, dso, symbol, raw_buf) + insert_db(event) + +def insert_db(event): + if event.ev_type == EVTYPE_GENERIC: + con.execute("insert into gen_events values(?, ?, ?, ?)", + (event.name, event.symbol, event.comm, event.dso)) + elif event.ev_type == EVTYPE_PEBS_LL: + event.ip &= 0x7fffffffffffffff + event.dla &= 0x7fffffffffffffff + con.execute("insert into pebs_ll values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + (event.name, event.symbol, event.comm, event.dso, event.flags, + event.ip, event.status, event.dse, event.dla, event.lat)) + +def trace_end(): + print "In trace_end:\n" + # We show the basic info for the 2 type of event classes + show_general_events() + show_pebs_ll() + con.close() + +# +# As the event number may be very big, so we can't use linear way +# to show the histogram in real number, but use a log2 algorithm. +# + +def num2sym(num): + # Each number will have at least one '#' + snum = '#' * (int)(math.log(num, 2) + 1) + return snum + +def show_general_events(): + + # Check the total record number in the table + count = con.execute("select count(*) from gen_events") + for t in count: + print "There is %d records in gen_events table" % t[0] + if t[0] == 0: + return + + print "Statistics about the general events grouped by thread/symbol/dso: \n" + + # Group by thread + commq = con.execute("select comm, count(comm) from gen_events group by comm order by -count(comm)") + print "\n%16s %8s %16s\n%s" % ("comm", "number", "histogram", "="*42) + for row in commq: + print "%16s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by symbol + print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histogram", "="*58) + symbolq = con.execute("select symbol, count(symbol) from gen_events group by symbol order by -count(symbol)") + for row in symbolq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by dso + print "\n%40s %8s %16s\n%s" % ("dso", "number", "histogram", "="*74) + dsoq = con.execute("select dso, count(dso) from gen_events group by dso order by -count(dso)") + for row in dsoq: + print "%40s %8d %s" % (row[0], row[1], num2sym(row[1])) + +# +# This function just shows the basic info, and we could do more with the +# data in the tables, like checking the function parameters when some +# big latency events happen. +# +def show_pebs_ll(): + + count = con.execute("select count(*) from pebs_ll") + for t in count: + print "There is %d records in pebs_ll table" % t[0] + if t[0] == 0: + return + + print "Statistics about the PEBS Load Latency events grouped by thread/symbol/dse/latency: \n" + + # Group by thread + commq = con.execute("select comm, count(comm) from pebs_ll group by comm order by -count(comm)") + print "\n%16s %8s %16s\n%s" % ("comm", "number", "histogram", "="*42) + for row in commq: + print "%16s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by symbol + print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histogram", "="*58) + symbolq = con.execute("select symbol, count(symbol) from pebs_ll group by symbol order by -count(symbol)") + for row in symbolq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by dse + dseq = con.execute("select dse, count(dse) from pebs_ll group by dse order by -count(dse)") + print "\n%32s %8s %16s\n%s" % ("dse", "number", "histogram", "="*58) + for row in dseq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by latency + latq = con.execute("select lat, count(lat) from pebs_ll group by lat order by lat") + print "\n%32s %8s %16s\n%s" % ("latency", "number", "histogram", "="*58) + for row in latq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + +def trace_unhandled(event_name, context, event_fields_dict): + print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 413bd62eedb1..81bd8c2af730 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -24,6 +24,7 @@ struct hist_browser { struct hist_entry *he_selection; struct map_symbol *selection; int print_seq; + bool show_dso; bool has_symbols; }; @@ -376,12 +377,19 @@ out: } static char *callchain_list__sym_name(struct callchain_list *cl, - char *bf, size_t bfsize) + char *bf, size_t bfsize, bool show_dso) { + int printed; + if (cl->ms.sym) - return cl->ms.sym->name; + printed = scnprintf(bf, bfsize, "%s", cl->ms.sym->name); + else + printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); + + if (show_dso) + scnprintf(bf + printed, bfsize - printed, " %s", + cl->ms.map ? cl->ms.map->dso->short_name : "unknown"); - snprintf(bf, bfsize, "%#" PRIx64, cl->ip); return bf; } @@ -417,7 +425,7 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *browse remaining -= cumul; list_for_each_entry(chain, &child->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str; + char bf[1024], *alloc_str; const char *str; int color; bool was_first = first; @@ -434,7 +442,8 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *browse } alloc_str = NULL; - str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + str = callchain_list__sym_name(chain, bf, sizeof(bf), + browser->show_dso); if (was_first) { double percent = cumul * 100.0 / new_total; @@ -493,7 +502,7 @@ static int hist_browser__show_callchain_node(struct hist_browser *browser, char folded_sign = ' '; list_for_each_entry(chain, &node->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *s; + char bf[1024], *s; int color; folded_sign = callchain_list__folded(chain); @@ -510,7 +519,8 @@ static int hist_browser__show_callchain_node(struct hist_browser *browser, *is_current_entry = true; } - s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + s = callchain_list__sym_name(chain, bf, sizeof(bf), + browser->show_dso); ui_browser__gotorc(&browser->b, row, 0); ui_browser__set_color(&browser->b, color); slsmg_write_nstring(" ", offset); @@ -576,7 +586,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (row_offset == 0) { - hist_entry__snprintf(entry, s, sizeof(s), browser->hists); + hist_entry__sort_snprintf(entry, s, sizeof(s), browser->hists); percent = (entry->period * 100.0) / browser->hists->stats.total_period; ui_browser__set_percent_color(&browser->b, percent, current_entry); @@ -830,7 +840,7 @@ static int hist_browser__fprintf_callchain_node_rb_tree(struct hist_browser *bro remaining -= cumul; list_for_each_entry(chain, &child->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *alloc_str; + char bf[1024], *alloc_str; const char *str; bool was_first = first; @@ -842,7 +852,8 @@ static int hist_browser__fprintf_callchain_node_rb_tree(struct hist_browser *bro folded_sign = callchain_list__folded(chain); alloc_str = NULL; - str = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + str = callchain_list__sym_name(chain, bf, sizeof(bf), + browser->show_dso); if (was_first) { double percent = cumul * 100.0 / new_total; @@ -880,10 +891,10 @@ static int hist_browser__fprintf_callchain_node(struct hist_browser *browser, int printed = 0; list_for_each_entry(chain, &node->val, list) { - char ipstr[BITS_PER_LONG / 4 + 1], *s; + char bf[1024], *s; folded_sign = callchain_list__folded(chain); - s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); + s = callchain_list__sym_name(chain, bf, sizeof(bf), browser->show_dso); printed += fprintf(fp, "%*s%c %s\n", offset, " ", folded_sign, s); } @@ -920,7 +931,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, if (symbol_conf.use_callchain) folded_sign = hist_entry__folded(he); - hist_entry__snprintf(he, s, sizeof(s), browser->hists); + hist_entry__sort_snprintf(he, s, sizeof(s), browser->hists); percent = (he->period * 100.0) / browser->hists->stats.total_period; if (symbol_conf.use_callchain) @@ -1133,6 +1144,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; case 'd': goto zoom_dso; + case 'V': + browser->show_dso = !browser->show_dso; + continue; case 't': goto zoom_thread; case '/': @@ -1164,6 +1178,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "d Zoom into current DSO\n" "t Zoom into current Thread\n" "P Print histograms to perf.hist.N\n" + "V Verbose (DSO names in callchains, etc)\n" "/ Filter symbol by name"); continue; case K_ENTER: diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c index ec12e0b4ded6..26b5b652a8cd 100644 --- a/tools/perf/ui/gtk/browser.c +++ b/tools/perf/ui/gtk/browser.c @@ -3,6 +3,7 @@ #include "../evsel.h" #include "../sort.h" #include "../hist.h" +#include "../helpline.h" #include "gtk.h" #include <signal.h> @@ -166,7 +167,7 @@ static GtkWidget *perf_gtk__setup_statusbar(void) } int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, - const char *help __used, + const char *help, void (*timer) (void *arg)__used, void *arg __used, int delay_secs __used) { @@ -233,6 +234,8 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER); + ui_helpline__push(help); + gtk_main(); perf_gtk__deactivate_context(&pgctx); diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h index a4d0f2b4a2dc..793cb6116ddf 100644 --- a/tools/perf/ui/gtk/gtk.h +++ b/tools/perf/ui/gtk/gtk.h @@ -29,6 +29,8 @@ static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx) struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window); int perf_gtk__deactivate_context(struct perf_gtk_context **ctx); +void perf_gtk__init_helpline(void); + #ifndef HAVE_GTK_INFO_BAR static inline GtkWidget *perf_gtk__setup_info_bar(void) { diff --git a/tools/perf/ui/gtk/helpline.c b/tools/perf/ui/gtk/helpline.c new file mode 100644 index 000000000000..5db4432ff12a --- /dev/null +++ b/tools/perf/ui/gtk/helpline.c @@ -0,0 +1,56 @@ +#include <stdio.h> +#include <string.h> + +#include "gtk.h" +#include "../ui.h" +#include "../helpline.h" +#include "../../util/debug.h" + +static void gtk_helpline_pop(void) +{ + if (!perf_gtk__is_active_context(pgctx)) + return; + + gtk_statusbar_pop(GTK_STATUSBAR(pgctx->statbar), + pgctx->statbar_ctx_id); +} + +static void gtk_helpline_push(const char *msg) +{ + if (!perf_gtk__is_active_context(pgctx)) + return; + + gtk_statusbar_push(GTK_STATUSBAR(pgctx->statbar), + pgctx->statbar_ctx_id, msg); +} + +static struct ui_helpline gtk_helpline_fns = { + .pop = gtk_helpline_pop, + .push = gtk_helpline_push, +}; + +void perf_gtk__init_helpline(void) +{ + helpline_fns = >k_helpline_fns; +} + +int perf_gtk__show_helpline(const char *fmt, va_list ap) +{ + int ret; + char *ptr; + static int backlog; + + ret = vscnprintf(ui_helpline__current + backlog, + sizeof(ui_helpline__current) - backlog, fmt, ap); + backlog += ret; + + /* only first line can be displayed */ + ptr = strchr(ui_helpline__current, '\n'); + if (ptr && (ptr - ui_helpline__current) <= backlog) { + *ptr = '\0'; + ui_helpline__puts(ui_helpline__current); + backlog = 0; + } + + return ret; +} diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c index 92879ce61e2f..ec1ee26b485a 100644 --- a/tools/perf/ui/gtk/setup.c +++ b/tools/perf/ui/gtk/setup.c @@ -7,11 +7,14 @@ extern struct perf_error_ops perf_gtk_eops; int perf_gtk__init(void) { perf_error__register(&perf_gtk_eops); + perf_gtk__init_helpline(); return gtk_init_check(NULL, NULL) ? 0 : -1; } void perf_gtk__exit(bool wait_for_ok __used) { + if (!perf_gtk__is_active_context(pgctx)) + return; perf_error__unregister(&perf_gtk_eops); gtk_main_quit(); } diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c index 0ead373c0dfb..b8efb966f94c 100644 --- a/tools/perf/ui/gtk/util.c +++ b/tools/perf/ui/gtk/util.c @@ -117,11 +117,6 @@ struct perf_error_ops perf_gtk_eops = { * For now, just add stubs for NO_NEWT=1 build. */ #ifdef NO_NEWT_SUPPORT -int ui_helpline__show_help(const char *format __used, va_list ap __used) -{ - return 0; -} - void ui_progress__update(u64 curr __used, u64 total __used, const char *title __used) { diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c index 2f950c2641c8..78ba28ac7a2c 100644 --- a/tools/perf/ui/helpline.c +++ b/tools/perf/ui/helpline.c @@ -5,23 +5,32 @@ #include "../debug.h" #include "helpline.h" #include "ui.h" -#include "libslang.h" -void ui_helpline__pop(void) +char ui_helpline__current[512]; + +static void nop_helpline__pop(void) { } -char ui_helpline__current[512]; +static void nop_helpline__push(const char *msg __used) +{ +} -void ui_helpline__push(const char *msg) +static struct ui_helpline default_helpline_fns = { + .pop = nop_helpline__pop, + .push = nop_helpline__push, +}; + +struct ui_helpline *helpline_fns = &default_helpline_fns; + +void ui_helpline__pop(void) { - const size_t sz = sizeof(ui_helpline__current); + helpline_fns->pop(); +} - SLsmg_gotorc(SLtt_Screen_Rows - 1, 0); - SLsmg_set_color(0); - SLsmg_write_nstring((char *)msg, SLtt_Screen_Cols); - SLsmg_refresh(); - strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0'; +void ui_helpline__push(const char *msg) +{ + helpline_fns->push(msg); } void ui_helpline__vpush(const char *fmt, va_list ap) @@ -50,30 +59,3 @@ void ui_helpline__puts(const char *msg) ui_helpline__pop(); ui_helpline__push(msg); } - -void ui_helpline__init(void) -{ - ui_helpline__puts(" "); -} - -char ui_helpline__last_msg[1024]; - -int ui_helpline__show_help(const char *format, va_list ap) -{ - int ret; - static int backlog; - - pthread_mutex_lock(&ui__lock); - ret = vscnprintf(ui_helpline__last_msg + backlog, - sizeof(ui_helpline__last_msg) - backlog, format, ap); - backlog += ret; - - if (ui_helpline__last_msg[backlog - 1] == '\n') { - ui_helpline__puts(ui_helpline__last_msg); - SLsmg_refresh(); - backlog = 0; - } - pthread_mutex_unlock(&ui__lock); - - return ret; -} diff --git a/tools/perf/ui/helpline.h b/tools/perf/ui/helpline.h index 7bab6b34e35e..a2487f93aa48 100644 --- a/tools/perf/ui/helpline.h +++ b/tools/perf/ui/helpline.h @@ -4,13 +4,44 @@ #include <stdio.h> #include <stdarg.h> +#include "../util/cache.h" + +struct ui_helpline { + void (*pop)(void); + void (*push)(const char *msg); +}; + +extern struct ui_helpline *helpline_fns; + void ui_helpline__init(void); + void ui_helpline__pop(void); void ui_helpline__push(const char *msg); void ui_helpline__vpush(const char *fmt, va_list ap); void ui_helpline__fpush(const char *fmt, ...); void ui_helpline__puts(const char *msg); -extern char ui_helpline__current[]; +extern char ui_helpline__current[512]; + +#ifdef NO_NEWT_SUPPORT +static inline int ui_helpline__show_help(const char *format __used, + va_list ap __used) +{ + return 0; +} +#else +extern char ui_helpline__last_msg[]; +int ui_helpline__show_help(const char *format, va_list ap); +#endif /* NO_NEWT_SUPPORT */ + +#ifdef NO_GTK2_SUPPORT +static inline int perf_gtk__show_helpline(const char *format __used, + va_list ap __used) +{ + return 0; +} +#else +int perf_gtk__show_helpline(const char *format, va_list ap); +#endif /* NO_GTK2_SUPPORT */ #endif /* _PERF_UI_HELPLINE_H_ */ diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c index 791fb15ce350..c7820e569660 100644 --- a/tools/perf/ui/setup.c +++ b/tools/perf/ui/setup.c @@ -1,7 +1,11 @@ +#include <pthread.h> + #include "../cache.h" #include "../debug.h" +pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; + void setup_browser(bool fallback_to_pager) { if (!isatty(1) || dump_trace) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c new file mode 100644 index 000000000000..9bf7e9e5a72e --- /dev/null +++ b/tools/perf/ui/stdio/hist.c @@ -0,0 +1,653 @@ +#include <stdio.h> +#include <math.h> + +#include "../../util/util.h" +#include "../../util/hist.h" +#include "../../util/sort.h" + + +static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) +{ + int i; + int ret = fprintf(fp, " "); + + for (i = 0; i < left_margin; i++) + ret += fprintf(fp, " "); + + return ret; +} + +static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask, + int left_margin) +{ + int i; + size_t ret = callchain__fprintf_left_margin(fp, left_margin); + + for (i = 0; i < depth; i++) + if (depth_mask & (1 << i)) + ret += fprintf(fp, "| "); + else + ret += fprintf(fp, " "); + + ret += fprintf(fp, "\n"); + + return ret; +} + +static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, + int depth, int depth_mask, int period, + u64 total_samples, u64 hits, + int left_margin) +{ + int i; + size_t ret = 0; + + ret += callchain__fprintf_left_margin(fp, left_margin); + for (i = 0; i < depth; i++) { + if (depth_mask & (1 << i)) + ret += fprintf(fp, "|"); + else + ret += fprintf(fp, " "); + if (!period && i == depth - 1) { + double percent; + + percent = hits * 100.0 / total_samples; + ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent); + } else + ret += fprintf(fp, "%s", " "); + } + if (chain->ms.sym) + ret += fprintf(fp, "%s\n", chain->ms.sym->name); + else + ret += fprintf(fp, "0x%0" PRIx64 "\n", chain->ip); + + return ret; +} + +static struct symbol *rem_sq_bracket; +static struct callchain_list rem_hits; + +static void init_rem_hits(void) +{ + rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6); + if (!rem_sq_bracket) { + fprintf(stderr, "Not enough memory to display remaining hits\n"); + return; + } + + strcpy(rem_sq_bracket->name, "[...]"); + rem_hits.ms.sym = rem_sq_bracket; +} + +static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, + u64 total_samples, int depth, + int depth_mask, int left_margin) +{ + struct rb_node *node, *next; + struct callchain_node *child; + struct callchain_list *chain; + int new_depth_mask = depth_mask; + u64 remaining; + size_t ret = 0; + int i; + uint entries_printed = 0; + + remaining = total_samples; + + node = rb_first(root); + while (node) { + u64 new_total; + u64 cumul; + + child = rb_entry(node, struct callchain_node, rb_node); + cumul = callchain_cumul_hits(child); + remaining -= cumul; + + /* + * The depth mask manages the output of pipes that show + * the depth. We don't want to keep the pipes of the current + * level for the last child of this depth. + * Except if we have remaining filtered hits. They will + * supersede the last child + */ + next = rb_next(node); + if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining)) + new_depth_mask &= ~(1 << (depth - 1)); + + /* + * But we keep the older depth mask for the line separator + * to keep the level link until we reach the last child + */ + ret += ipchain__fprintf_graph_line(fp, depth, depth_mask, + left_margin); + i = 0; + list_for_each_entry(chain, &child->val, list) { + ret += ipchain__fprintf_graph(fp, chain, depth, + new_depth_mask, i++, + total_samples, + cumul, + left_margin); + } + + if (callchain_param.mode == CHAIN_GRAPH_REL) + new_total = child->children_hit; + else + new_total = total_samples; + + ret += __callchain__fprintf_graph(fp, &child->rb_root, new_total, + depth + 1, + new_depth_mask | (1 << depth), + left_margin); + node = next; + if (++entries_printed == callchain_param.print_limit) + break; + } + + if (callchain_param.mode == CHAIN_GRAPH_REL && + remaining && remaining != total_samples) { + + if (!rem_sq_bracket) + return ret; + + new_depth_mask &= ~(1 << (depth - 1)); + ret += ipchain__fprintf_graph(fp, &rem_hits, depth, + new_depth_mask, 0, total_samples, + remaining, left_margin); + } + + return ret; +} + +static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, + u64 total_samples, int left_margin) +{ + struct callchain_node *cnode; + struct callchain_list *chain; + u32 entries_printed = 0; + bool printed = false; + struct rb_node *node; + int i = 0; + int ret = 0; + + /* + * If have one single callchain root, don't bother printing + * its percentage (100 % in fractal mode and the same percentage + * than the hist in graph mode). This also avoid one level of column. + */ + node = rb_first(root); + if (node && !rb_next(node)) { + cnode = rb_entry(node, struct callchain_node, rb_node); + list_for_each_entry(chain, &cnode->val, list) { + /* + * If we sort by symbol, the first entry is the same than + * the symbol. No need to print it otherwise it appears as + * displayed twice. + */ + if (!i++ && sort__first_dimension == SORT_SYM) + continue; + if (!printed) { + ret += callchain__fprintf_left_margin(fp, left_margin); + ret += fprintf(fp, "|\n"); + ret += callchain__fprintf_left_margin(fp, left_margin); + ret += fprintf(fp, "---"); + left_margin += 3; + printed = true; + } else + ret += callchain__fprintf_left_margin(fp, left_margin); + + if (chain->ms.sym) + ret += fprintf(fp, " %s\n", chain->ms.sym->name); + else + ret += fprintf(fp, " %p\n", (void *)(long)chain->ip); + + if (++entries_printed == callchain_param.print_limit) + break; + } + root = &cnode->rb_root; + } + + ret += __callchain__fprintf_graph(fp, root, total_samples, + 1, 1, left_margin); + ret += fprintf(fp, "\n"); + + return ret; +} + +static size_t __callchain__fprintf_flat(FILE *fp, + struct callchain_node *self, + u64 total_samples) +{ + struct callchain_list *chain; + size_t ret = 0; + + if (!self) + return 0; + + ret += __callchain__fprintf_flat(fp, self->parent, total_samples); + + + list_for_each_entry(chain, &self->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + if (chain->ms.sym) + ret += fprintf(fp, " %s\n", chain->ms.sym->name); + else + ret += fprintf(fp, " %p\n", + (void *)(long)chain->ip); + } + + return ret; +} + +static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *self, + u64 total_samples) +{ + size_t ret = 0; + u32 entries_printed = 0; + struct rb_node *rb_node; + struct callchain_node *chain; + + rb_node = rb_first(self); + while (rb_node) { + double percent; + + chain = rb_entry(rb_node, struct callchain_node, rb_node); + percent = chain->hit * 100.0 / total_samples; + + ret = percent_color_fprintf(fp, " %6.2f%%\n", percent); + ret += __callchain__fprintf_flat(fp, chain, total_samples); + ret += fprintf(fp, "\n"); + if (++entries_printed == callchain_param.print_limit) + break; + + rb_node = rb_next(rb_node); + } + + return ret; +} + +static size_t hist_entry_callchain__fprintf(struct hist_entry *he, + u64 total_samples, int left_margin, + FILE *fp) +{ + switch (callchain_param.mode) { + case CHAIN_GRAPH_REL: + return callchain__fprintf_graph(fp, &he->sorted_chain, he->period, + left_margin); + break; + case CHAIN_GRAPH_ABS: + return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, + left_margin); + break; + case CHAIN_FLAT: + return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples); + break; + case CHAIN_NONE: + break; + default: + pr_err("Bad callchain mode\n"); + } + + return 0; +} + +static int hist_entry__period_snprintf(struct hist_entry *he, char *s, + size_t size, struct hists *pair_hists, + bool show_displacement, long displacement, + bool color, u64 total_period) +{ + u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; + u64 nr_events; + const char *sep = symbol_conf.field_sep; + int ret; + + if (symbol_conf.exclude_other && !he->parent) + return 0; + + if (pair_hists) { + period = he->pair ? he->pair->period : 0; + nr_events = he->pair ? he->pair->nr_events : 0; + total = pair_hists->stats.total_period; + period_sys = he->pair ? he->pair->period_sys : 0; + period_us = he->pair ? he->pair->period_us : 0; + period_guest_sys = he->pair ? he->pair->period_guest_sys : 0; + period_guest_us = he->pair ? he->pair->period_guest_us : 0; + } else { + period = he->period; + nr_events = he->nr_events; + total = total_period; + period_sys = he->period_sys; + period_us = he->period_us; + period_guest_sys = he->period_guest_sys; + period_guest_us = he->period_guest_us; + } + + if (total) { + if (color) + ret = percent_color_snprintf(s, size, + sep ? "%.2f" : " %6.2f%%", + (period * 100.0) / total); + else + ret = scnprintf(s, size, sep ? "%.2f" : " %6.2f%%", + (period * 100.0) / total); + if (symbol_conf.show_cpu_utilization) { + ret += percent_color_snprintf(s + ret, size - ret, + sep ? "%.2f" : " %6.2f%%", + (period_sys * 100.0) / total); + ret += percent_color_snprintf(s + ret, size - ret, + sep ? "%.2f" : " %6.2f%%", + (period_us * 100.0) / total); + if (perf_guest) { + ret += percent_color_snprintf(s + ret, + size - ret, + sep ? "%.2f" : " %6.2f%%", + (period_guest_sys * 100.0) / + total); + ret += percent_color_snprintf(s + ret, + size - ret, + sep ? "%.2f" : " %6.2f%%", + (period_guest_us * 100.0) / + total); + } + } + } else + ret = scnprintf(s, size, sep ? "%" PRIu64 : "%12" PRIu64 " ", period); + + if (symbol_conf.show_nr_samples) { + if (sep) + ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events); + else + ret += scnprintf(s + ret, size - ret, "%11" PRIu64, nr_events); + } + + if (symbol_conf.show_total_period) { + if (sep) + ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period); + else + ret += scnprintf(s + ret, size - ret, " %12" PRIu64, period); + } + + if (pair_hists) { + char bf[32]; + double old_percent = 0, new_percent = 0, diff; + + if (total > 0) + old_percent = (period * 100.0) / total; + if (total_period > 0) + new_percent = (he->period * 100.0) / total_period; + + diff = new_percent - old_percent; + + if (fabs(diff) >= 0.01) + scnprintf(bf, sizeof(bf), "%+4.2F%%", diff); + else + scnprintf(bf, sizeof(bf), " "); + + if (sep) + ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); + else + ret += scnprintf(s + ret, size - ret, "%11.11s", bf); + + if (show_displacement) { + if (displacement) + scnprintf(bf, sizeof(bf), "%+4ld", displacement); + else + scnprintf(bf, sizeof(bf), " "); + + if (sep) + ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); + else + ret += scnprintf(s + ret, size - ret, "%6.6s", bf); + } + } + + return ret; +} + +int hist_entry__sort_snprintf(struct hist_entry *he, char *s, size_t size, + struct hists *hists) +{ + const char *sep = symbol_conf.field_sep; + struct sort_entry *se; + int ret = 0; + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + + ret += scnprintf(s + ret, size - ret, "%s", sep ?: " "); + ret += se->se_snprintf(he, s + ret, size - ret, + hists__col_len(hists, se->se_width_idx)); + } + + return ret; +} + +static size_t hist_entry__callchain_fprintf(struct hist_entry *he, + struct hists *hists, + u64 total_period, FILE *fp) +{ + int left_margin = 0; + + if (sort__first_dimension == SORT_COMM) { + struct sort_entry *se = list_first_entry(&hist_entry__sort_list, + typeof(*se), list); + left_margin = hists__col_len(hists, se->se_width_idx); + left_margin -= thread__comm_len(he->thread); + } + + return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); +} + +static int hist_entry__fprintf(struct hist_entry *he, size_t size, + struct hists *hists, struct hists *pair_hists, + bool show_displacement, long displacement, + u64 total_period, FILE *fp) +{ + char bf[512]; + int ret; + + if (size == 0 || size > sizeof(bf)) + size = sizeof(bf); + + ret = hist_entry__period_snprintf(he, bf, size, pair_hists, + show_displacement, displacement, + true, total_period); + hist_entry__sort_snprintf(he, bf + ret, size - ret, hists); + + ret = fprintf(fp, "%s\n", bf); + + if (symbol_conf.use_callchain) + ret += hist_entry__callchain_fprintf(he, hists, + total_period, fp); + + return ret; +} + +size_t hists__fprintf(struct hists *hists, struct hists *pair, + bool show_displacement, bool show_header, int max_rows, + int max_cols, FILE *fp) +{ + struct sort_entry *se; + struct rb_node *nd; + size_t ret = 0; + u64 total_period; + unsigned long position = 1; + long displacement = 0; + unsigned int width; + const char *sep = symbol_conf.field_sep; + const char *col_width = symbol_conf.col_width_list_str; + int nr_rows = 0; + + init_rem_hits(); + + if (!show_header) + goto print_entries; + + fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); + + if (symbol_conf.show_cpu_utilization) { + if (sep) { + ret += fprintf(fp, "%csys", *sep); + ret += fprintf(fp, "%cus", *sep); + if (perf_guest) { + ret += fprintf(fp, "%cguest sys", *sep); + ret += fprintf(fp, "%cguest us", *sep); + } + } else { + ret += fprintf(fp, " sys "); + ret += fprintf(fp, " us "); + if (perf_guest) { + ret += fprintf(fp, " guest sys "); + ret += fprintf(fp, " guest us "); + } + } + } + + if (symbol_conf.show_nr_samples) { + if (sep) + fprintf(fp, "%cSamples", *sep); + else + fputs(" Samples ", fp); + } + + if (symbol_conf.show_total_period) { + if (sep) + ret += fprintf(fp, "%cPeriod", *sep); + else + ret += fprintf(fp, " Period "); + } + + if (pair) { + if (sep) + ret += fprintf(fp, "%cDelta", *sep); + else + ret += fprintf(fp, " Delta "); + + if (show_displacement) { + if (sep) + ret += fprintf(fp, "%cDisplacement", *sep); + else + ret += fprintf(fp, " Displ"); + } + } + + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (se->elide) + continue; + if (sep) { + fprintf(fp, "%c%s", *sep, se->se_header); + continue; + } + width = strlen(se->se_header); + if (symbol_conf.col_width_list_str) { + if (col_width) { + hists__set_col_len(hists, se->se_width_idx, + atoi(col_width)); + col_width = strchr(col_width, ','); + if (col_width) + ++col_width; + } + } + if (!hists__new_col_len(hists, se->se_width_idx, width)) + width = hists__col_len(hists, se->se_width_idx); + fprintf(fp, " %*s", width, se->se_header); + } + + fprintf(fp, "\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; + + if (sep) + goto print_entries; + + fprintf(fp, "# ........"); + if (symbol_conf.show_cpu_utilization) + fprintf(fp, " ....... ......."); + if (symbol_conf.show_nr_samples) + fprintf(fp, " .........."); + if (symbol_conf.show_total_period) + fprintf(fp, " ............"); + if (pair) { + fprintf(fp, " .........."); + if (show_displacement) + fprintf(fp, " ....."); + } + list_for_each_entry(se, &hist_entry__sort_list, list) { + unsigned int i; + + if (se->elide) + continue; + + fprintf(fp, " "); + width = hists__col_len(hists, se->se_width_idx); + if (width == 0) + width = strlen(se->se_header); + for (i = 0; i < width; i++) + fprintf(fp, "."); + } + + fprintf(fp, "\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; + + fprintf(fp, "#\n"); + if (max_rows && ++nr_rows >= max_rows) + goto out; + +print_entries: + total_period = hists->stats.total_period; + + for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + if (h->filtered) + continue; + + if (show_displacement) { + if (h->pair != NULL) + displacement = ((long)h->pair->position - + (long)position); + else + displacement = 0; + ++position; + } + ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, + displacement, total_period, fp); + + if (max_rows && ++nr_rows >= max_rows) + goto out; + + if (h->ms.map == NULL && verbose > 1) { + __map_groups__fprintf_maps(&h->thread->mg, + MAP__FUNCTION, verbose, fp); + fprintf(fp, "%.10s end\n", graph_dotted_line); + } + } +out: + free(rem_sq_bracket); + + return ret; +} + +size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) +{ + int i; + size_t ret = 0; + + for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { + const char *name; + + if (hists->stats.nr_events[i] == 0) + continue; + + name = perf_event__name(i); + if (!strcmp(name, "UNKNOWN")) + continue; + + ret += fprintf(fp, "%16s events: %10d\n", name, + hists->stats.nr_events[i]); + } + + return ret; +} diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c new file mode 100644 index 000000000000..2884d2f41e33 --- /dev/null +++ b/tools/perf/ui/tui/helpline.c @@ -0,0 +1,57 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <pthread.h> + +#include "../../util/debug.h" +#include "../helpline.h" +#include "../ui.h" +#include "../libslang.h" + +static void tui_helpline__pop(void) +{ +} + +static void tui_helpline__push(const char *msg) +{ + const size_t sz = sizeof(ui_helpline__current); + + SLsmg_gotorc(SLtt_Screen_Rows - 1, 0); + SLsmg_set_color(0); + SLsmg_write_nstring((char *)msg, SLtt_Screen_Cols); + SLsmg_refresh(); + strncpy(ui_helpline__current, msg, sz)[sz - 1] = '\0'; +} + +struct ui_helpline tui_helpline_fns = { + .pop = tui_helpline__pop, + .push = tui_helpline__push, +}; + +void ui_helpline__init(void) +{ + helpline_fns = &tui_helpline_fns; + ui_helpline__puts(" "); +} + +char ui_helpline__last_msg[1024]; + +int ui_helpline__show_help(const char *format, va_list ap) +{ + int ret; + static int backlog; + + pthread_mutex_lock(&ui__lock); + ret = vscnprintf(ui_helpline__last_msg + backlog, + sizeof(ui_helpline__last_msg) - backlog, format, ap); + backlog += ret; + + if (ui_helpline__last_msg[backlog - 1] == '\n') { + ui_helpline__puts(ui_helpline__last_msg); + SLsmg_refresh(); + backlog = 0; + } + pthread_mutex_unlock(&ui__lock); + + return ret; +} diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index e813c1d17346..4c936e09931c 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -11,8 +11,6 @@ #include "../libslang.h" #include "../keysyms.h" -pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER; - static volatile int ui__need_resize; extern struct perf_error_ops perf_tui_eops; diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 4dfe0bb3c322..66eb3828ceb5 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -23,8 +23,10 @@ int eprintf(int level, const char *fmt, ...) if (verbose >= level) { va_start(args, fmt); - if (use_browser > 0) + if (use_browser == 1) ret = ui_helpline__show_help(fmt, args); + else if (use_browser == 2) + ret = perf_gtk__show_helpline(fmt, args); else ret = vfprintf(stderr, fmt, args); va_end(args); diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 015c91dbc096..05e660cbf7e2 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -4,6 +4,7 @@ #include <stdbool.h> #include "event.h" +#include "../ui/helpline.h" extern int verbose; extern bool quiet, dump_trace; @@ -15,11 +16,6 @@ struct ui_progress; struct perf_error_ops; #if defined(NO_NEWT_SUPPORT) && defined(NO_GTK2_SUPPORT) -static inline int ui_helpline__show_help(const char *format __used, va_list ap __used) -{ - return 0; -} - static inline void ui_progress__update(u64 curr __used, u64 total __used, const char *title __used) {} @@ -39,8 +35,6 @@ perf_error__unregister(struct perf_error_ops *eops __used) #else /* NO_NEWT_SUPPORT && NO_GTK2_SUPPORT */ -extern char ui_helpline__last_msg[]; -int ui_helpline__show_help(const char *format, va_list ap); #include "../ui/progress.h" int ui__error(const char *format, ...) __attribute__((format(printf, 1, 2))); #include "../ui/util.h" diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 2a6f33cd888c..3a0f1a5da91c 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -412,7 +412,7 @@ struct process_symbol_args { }; static int find_symbol_cb(void *arg, const char *name, char type, - u64 start, u64 end __used) + u64 start) { struct process_symbol_args *args = arg; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index d84870b06426..0e088d046e56 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -69,6 +69,16 @@ struct sample_event { u64 array[]; }; +struct regs_dump { + u64 *regs; +}; + +struct stack_dump { + u16 offset; + u64 size; + char *data; +}; + struct perf_sample { u64 ip; u32 pid, tid; @@ -82,6 +92,8 @@ struct perf_sample { void *raw_data; struct ip_callchain *callchain; struct branch_stack *branch_stack; + struct regs_dump user_regs; + struct stack_dump user_stack; }; #define BUILD_ID_SIZE 20 diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 9b38681add9e..4774ac1e3d5f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -57,7 +57,7 @@ void perf_evlist__config_attrs(struct perf_evlist *evlist, if (evlist->cpus->map[0] < 0) opts->no_inherit = true; - first = list_entry(evlist->entries.next, struct perf_evsel, node); + first = perf_evlist__first(evlist); list_for_each_entry(evsel, &evlist->entries, node) { perf_evsel__config(evsel, opts, first); @@ -108,6 +108,25 @@ void perf_evlist__splice_list_tail(struct perf_evlist *evlist, evlist->nr_entries += nr_entries; } +void __perf_evlist__set_leader(struct list_head *list) +{ + struct perf_evsel *evsel, *leader; + + leader = list_entry(list->next, struct perf_evsel, node); + leader->leader = NULL; + + list_for_each_entry(evsel, list, node) { + if (evsel != leader) + evsel->leader = leader; + } +} + +void perf_evlist__set_leader(struct perf_evlist *evlist) +{ + if (evlist->nr_entries) + __perf_evlist__set_leader(&evlist->entries); +} + int perf_evlist__add_default(struct perf_evlist *evlist) { struct perf_event_attr attr = { @@ -357,7 +376,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) int hash; if (evlist->nr_entries == 1) - return list_entry(evlist->entries.next, struct perf_evsel, node); + return perf_evlist__first(evlist); hash = hash_64(id, PERF_EVLIST__HLIST_BITS); head = &evlist->heads[hash]; @@ -367,7 +386,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) return sid->evsel; if (!perf_evlist__sample_id_all(evlist)) - return list_entry(evlist->entries.next, struct perf_evsel, node); + return perf_evlist__first(evlist); return NULL; } @@ -675,11 +694,9 @@ int perf_evlist__set_filters(struct perf_evlist *evlist) return 0; } -bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist) +bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) { - struct perf_evsel *pos, *first; - - pos = first = list_entry(evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; list_for_each_entry_continue(pos, &evlist->entries, node) { if (first->attr.sample_type != pos->attr.sample_type) @@ -689,23 +706,19 @@ bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist) return true; } -u64 perf_evlist__sample_type(const struct perf_evlist *evlist) +u64 perf_evlist__sample_type(struct perf_evlist *evlist) { - struct perf_evsel *first; - - first = list_entry(evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(evlist); return first->attr.sample_type; } -u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist) +u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) { - struct perf_evsel *first; + struct perf_evsel *first = perf_evlist__first(evlist); struct perf_sample *data; u64 sample_type; u16 size = 0; - first = list_entry(evlist->entries.next, struct perf_evsel, node); - if (!first->attr.sample_id_all) goto out; @@ -729,11 +742,9 @@ out: return size; } -bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist) +bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) { - struct perf_evsel *pos, *first; - - pos = first = list_entry(evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; list_for_each_entry_continue(pos, &evlist->entries, node) { if (first->attr.sample_id_all != pos->attr.sample_id_all) @@ -743,11 +754,9 @@ bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist) return true; } -bool perf_evlist__sample_id_all(const struct perf_evlist *evlist) +bool perf_evlist__sample_id_all(struct perf_evlist *evlist) { - struct perf_evsel *first; - - first = list_entry(evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(evlist); return first->attr.sample_id_all; } @@ -757,21 +766,13 @@ void perf_evlist__set_selected(struct perf_evlist *evlist, evlist->selected = evsel; } -int perf_evlist__open(struct perf_evlist *evlist, bool group) +int perf_evlist__open(struct perf_evlist *evlist) { - struct perf_evsel *evsel, *first; + struct perf_evsel *evsel; int err, ncpus, nthreads; - first = list_entry(evlist->entries.next, struct perf_evsel, node); - list_for_each_entry(evsel, &evlist->entries, node) { - struct xyarray *group_fd = NULL; - - if (group && evsel != first) - group_fd = first->fd; - - err = perf_evsel__open(evsel, evlist->cpus, evlist->threads, - group, group_fd); + err = perf_evsel__open(evsel, evlist->cpus, evlist->threads); if (err < 0) goto out_err; } @@ -885,6 +886,6 @@ int perf_evlist__start_workload(struct perf_evlist *evlist) int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample, bool swapped) { - struct perf_evsel *e = list_entry(evlist->entries.next, struct perf_evsel, node); - return perf_evsel__parse_sample(e, event, sample, swapped); + struct perf_evsel *evsel = perf_evlist__first(evlist); + return perf_evsel__parse_sample(evsel, event, sample, swapped); } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 528c1acd9298..2ed255792c6b 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -5,6 +5,7 @@ #include <stdio.h> #include "../perf.h" #include "event.h" +#include "evsel.h" #include "util.h" #include <unistd.h> @@ -41,8 +42,6 @@ struct perf_evsel_str_handler { void *handler; }; -struct perf_evsel; - struct perf_evlist *perf_evlist__new(struct cpu_map *cpus, struct thread_map *threads); void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, @@ -85,7 +84,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id); union perf_event *perf_evlist__mmap_read(struct perf_evlist *self, int idx); -int perf_evlist__open(struct perf_evlist *evlist, bool group); +int perf_evlist__open(struct perf_evlist *evlist); void perf_evlist__config_attrs(struct perf_evlist *evlist, struct perf_record_opts *opts); @@ -118,18 +117,30 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, void perf_evlist__delete_maps(struct perf_evlist *evlist); int perf_evlist__set_filters(struct perf_evlist *evlist); -u64 perf_evlist__sample_type(const struct perf_evlist *evlist); -bool perf_evlist__sample_id_all(const const struct perf_evlist *evlist); -u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist); +void __perf_evlist__set_leader(struct list_head *list); +void perf_evlist__set_leader(struct perf_evlist *evlist); + +u64 perf_evlist__sample_type(struct perf_evlist *evlist); +bool perf_evlist__sample_id_all(struct perf_evlist *evlist); +u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample, bool swapped); -bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist); -bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist); +bool perf_evlist__valid_sample_type(struct perf_evlist *evlist); +bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); void perf_evlist__splice_list_tail(struct perf_evlist *evlist, struct list_head *list, int nr_entries); +static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) +{ + return list_entry(evlist->entries.next, struct perf_evsel, node); +} + +static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) +{ + return list_entry(evlist->entries.prev, struct perf_evsel, node); +} #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2eaae140def2..7ff3c8fb736c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -8,6 +8,7 @@ */ #include <byteswap.h> +#include <linux/bitops.h> #include "asm/bug.h" #include "evsel.h" #include "evlist.h" @@ -16,9 +17,10 @@ #include "thread_map.h" #include "target.h" #include "../../../include/linux/hw_breakpoint.h" +#include "../../include/linux/perf_event.h" +#include "perf_regs.h" #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) -#define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0)) static int __perf_evsel__sample_size(u64 sample_type) { @@ -317,7 +319,8 @@ const char *perf_evsel__name(struct perf_evsel *evsel) break; default: - scnprintf(bf, sizeof(bf), "%s", "unknown attr type"); + scnprintf(bf, sizeof(bf), "unknown attr type: %d", + evsel->attr.type); break; } @@ -367,9 +370,18 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts, attr->mmap_data = track; } - if (opts->call_graph) + if (opts->call_graph) { attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + if (opts->call_graph == CALLCHAIN_DWARF) { + attr->sample_type |= PERF_SAMPLE_REGS_USER | + PERF_SAMPLE_STACK_USER; + attr->sample_regs_user = PERF_REGS_MASK; + attr->sample_stack_user = opts->stack_dump_size; + attr->exclude_callchain_user = 1; + } + } + if (perf_target__has_cpu(&opts->target)) attr->sample_type |= PERF_SAMPLE_CPU; @@ -481,6 +493,7 @@ void perf_evsel__delete(struct perf_evsel *evsel) { perf_evsel__exit(evsel); close_cgroup(evsel->cgrp); + free(evsel->group_name); free(evsel->name); free(evsel); } @@ -556,9 +569,28 @@ int __perf_evsel__read(struct perf_evsel *evsel, return 0; } +static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) +{ + struct perf_evsel *leader = evsel->leader; + int fd; + + if (!leader) + return -1; + + /* + * Leader must be already processed/open, + * if not it's a bug. + */ + BUG_ON(!leader->fd); + + fd = FD(leader, cpu, thread); + BUG_ON(fd == -1); + + return fd; +} + static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group, - struct xyarray *group_fds) + struct thread_map *threads) { int cpu, thread; unsigned long flags = 0; @@ -574,13 +606,15 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, } for (cpu = 0; cpu < cpus->nr; cpu++) { - int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1; for (thread = 0; thread < threads->nr; thread++) { + int group_fd; if (!evsel->cgrp) pid = threads->map[thread]; + group_fd = get_group_fd(evsel, cpu, thread); + FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], @@ -589,9 +623,6 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, err = -errno; goto out_close; } - - if (group && group_fd == -1) - group_fd = FD(evsel, cpu, thread); } } @@ -635,8 +666,7 @@ static struct { }; int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group, - struct xyarray *group_fd) + struct thread_map *threads) { if (cpus == NULL) { /* Work around old compiler warnings about strict aliasing */ @@ -646,23 +676,19 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, if (threads == NULL) threads = &empty_thread_map.map; - return __perf_evsel__open(evsel, cpus, threads, group, group_fd); + return __perf_evsel__open(evsel, cpus, threads); } int perf_evsel__open_per_cpu(struct perf_evsel *evsel, - struct cpu_map *cpus, bool group, - struct xyarray *group_fd) + struct cpu_map *cpus) { - return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, - group_fd); + return __perf_evsel__open(evsel, cpus, &empty_thread_map.map); } int perf_evsel__open_per_thread(struct perf_evsel *evsel, - struct thread_map *threads, bool group, - struct xyarray *group_fd) + struct thread_map *threads) { - return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, - group_fd); + return __perf_evsel__open(evsel, &empty_cpu_map.map, threads); } static int perf_event__parse_id_sample(const union perf_event *event, u64 type, @@ -733,6 +759,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *data, bool swapped) { u64 type = evsel->attr.sample_type; + u64 regs_user = evsel->attr.sample_regs_user; const u64 *array; /* @@ -869,6 +896,32 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, sz /= sizeof(u64); array += sz; } + + if (type & PERF_SAMPLE_REGS_USER) { + /* First u64 tells us if we have any regs in sample. */ + u64 avail = *array++; + + if (avail) { + data->user_regs.regs = (u64 *)array; + array += hweight_long(regs_user); + } + } + + if (type & PERF_SAMPLE_STACK_USER) { + u64 size = *array++; + + data->user_stack.offset = ((char *)(array - 1) + - (char *) event); + + if (!size) { + data->user_stack.size = 0; + } else { + data->user_stack.data = (char *)array; + array += size / sizeof(*array); + data->user_stack.size = *array; + } + } + return 0; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index b559929983bb..94f6ba16747f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -53,9 +53,10 @@ struct perf_evsel { u64 *id; struct perf_counts *counts; int idx; - int ids; + u32 ids; struct hists hists; char *name; + struct event_format *tp_format; union { void *priv; off_t id_offset; @@ -67,6 +68,10 @@ struct perf_evsel { } handler; unsigned int sample_size; bool supported; + /* parse modifier helper */ + int exclude_GH; + struct perf_evsel *leader; + char *group_name; }; struct cpu_map; @@ -106,14 +111,11 @@ void perf_evsel__free_id(struct perf_evsel *evsel); void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); int perf_evsel__open_per_cpu(struct perf_evsel *evsel, - struct cpu_map *cpus, bool group, - struct xyarray *group_fds); + struct cpu_map *cpus); int perf_evsel__open_per_thread(struct perf_evsel *evsel, - struct thread_map *threads, bool group, - struct xyarray *group_fds); + struct thread_map *threads); int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, - struct thread_map *threads, bool group, - struct xyarray *group_fds); + struct thread_map *threads); void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads); #define perf_evsel__match(evsel, t, c) \ @@ -182,4 +184,9 @@ void hists__init(struct hists *hists); int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *sample, bool swapped); + +static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel) +{ + return list_entry(evsel->node.next, struct perf_evsel, node); +} #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index f06f6fd148f8..389590c1ad21 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -21,4 +21,19 @@ do p }' "Documentation/perf-$cmd.txt" done + +echo "#ifndef NO_LIBELF_SUPPORT" +sed -n -e 's/^perf-\([^ ]*\)[ ].* full.*/\1/p' command-list.txt | +sort | +while read cmd +do + sed -n ' + /^NAME/,/perf-'"$cmd"'/H + ${ + x + s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ + p + }' "Documentation/perf-$cmd.txt" +done +echo "#endif /* NO_LIBELF_SUPPORT */" echo "};" diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 74ea3c2f8138..9696e64c9dbd 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -20,11 +20,12 @@ #include "symbol.h" #include "debug.h" #include "cpumap.h" +#include "pmu.h" static bool no_buildid_cache = false; -static int event_count; -static struct perf_trace_event_type *events; +static int trace_event_count; +static struct perf_trace_event_type *trace_events; static u32 header_argc; static const char **header_argv; @@ -36,24 +37,24 @@ int perf_header__push_event(u64 id, const char *name) if (strlen(name) > MAX_EVENT_NAME) pr_warning("Event %s will be truncated\n", name); - nevents = realloc(events, (event_count + 1) * sizeof(*events)); + nevents = realloc(trace_events, (trace_event_count + 1) * sizeof(*trace_events)); if (nevents == NULL) return -ENOMEM; - events = nevents; + trace_events = nevents; - memset(&events[event_count], 0, sizeof(struct perf_trace_event_type)); - events[event_count].event_id = id; - strncpy(events[event_count].name, name, MAX_EVENT_NAME - 1); - event_count++; + memset(&trace_events[trace_event_count], 0, sizeof(struct perf_trace_event_type)); + trace_events[trace_event_count].event_id = id; + strncpy(trace_events[trace_event_count].name, name, MAX_EVENT_NAME - 1); + trace_event_count++; return 0; } char *perf_header__find_event(u64 id) { int i; - for (i = 0 ; i < event_count; i++) { - if (events[i].event_id == id) - return events[i].name; + for (i = 0 ; i < trace_event_count; i++) { + if (trace_events[i].event_id == id) + return trace_events[i].name; } return NULL; } @@ -608,11 +609,11 @@ static int write_nrcpus(int fd, struct perf_header *h __used, static int write_event_desc(int fd, struct perf_header *h __used, struct perf_evlist *evlist) { - struct perf_evsel *attr; + struct perf_evsel *evsel; u32 nre = 0, nri, sz; int ret; - list_for_each_entry(attr, &evlist->entries, node) + list_for_each_entry(evsel, &evlist->entries, node) nre++; /* @@ -625,14 +626,14 @@ static int write_event_desc(int fd, struct perf_header *h __used, /* * size of perf_event_attr struct */ - sz = (u32)sizeof(attr->attr); + sz = (u32)sizeof(evsel->attr); ret = do_write(fd, &sz, sizeof(sz)); if (ret < 0) return ret; - list_for_each_entry(attr, &evlist->entries, node) { + list_for_each_entry(evsel, &evlist->entries, node) { - ret = do_write(fd, &attr->attr, sz); + ret = do_write(fd, &evsel->attr, sz); if (ret < 0) return ret; /* @@ -642,7 +643,7 @@ static int write_event_desc(int fd, struct perf_header *h __used, * copy into an nri to be independent of the * type of ids, */ - nri = attr->ids; + nri = evsel->ids; ret = do_write(fd, &nri, sizeof(nri)); if (ret < 0) return ret; @@ -650,13 +651,13 @@ static int write_event_desc(int fd, struct perf_header *h __used, /* * write event string as passed on cmdline */ - ret = do_write_string(fd, perf_evsel__name(attr)); + ret = do_write_string(fd, perf_evsel__name(evsel)); if (ret < 0) return ret; /* * write unique ids for this event */ - ret = do_write(fd, attr->id, attr->ids * sizeof(u64)); + ret = do_write(fd, evsel->id, evsel->ids * sizeof(u64)); if (ret < 0) return ret; } @@ -1004,6 +1005,45 @@ done: } /* + * File format: + * + * struct pmu_mappings { + * u32 pmu_num; + * struct pmu_map { + * u32 type; + * char name[]; + * }[pmu_num]; + * }; + */ + +static int write_pmu_mappings(int fd, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + struct perf_pmu *pmu = NULL; + off_t offset = lseek(fd, 0, SEEK_CUR); + __u32 pmu_num = 0; + + /* write real pmu_num later */ + do_write(fd, &pmu_num, sizeof(pmu_num)); + + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name) + continue; + pmu_num++; + do_write(fd, &pmu->type, sizeof(pmu->type)); + do_write_string(fd, pmu->name); + } + + if (pwrite(fd, &pmu_num, sizeof(pmu_num), offset) != sizeof(pmu_num)) { + /* discard all */ + lseek(fd, offset, SEEK_SET); + return -1; + } + + return 0; +} + +/* * default get_cpuid(): nothing gets recorded * actual implementation must be in arch/$(ARCH)/util/header.c */ @@ -1148,12 +1188,29 @@ static void print_cpu_topology(struct perf_header *ph, int fd, FILE *fp) } } -static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) +static void free_event_desc(struct perf_evsel *events) { - struct perf_event_attr attr; - uint64_t id; + struct perf_evsel *evsel; + + if (!events) + return; + + for (evsel = events; evsel->attr.size; evsel++) { + if (evsel->name) + free(evsel->name); + if (evsel->id) + free(evsel->id); + } + + free(events); +} + +static struct perf_evsel * +read_event_desc(struct perf_header *ph, int fd) +{ + struct perf_evsel *evsel, *events = NULL; + u64 *id; void *buf = NULL; - char *str; u32 nre, sz, nr, i, j; ssize_t ret; size_t msz; @@ -1173,18 +1230,22 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) if (ph->needs_swap) sz = bswap_32(sz); - memset(&attr, 0, sizeof(attr)); - /* buffer to hold on file attr struct */ buf = malloc(sz); if (!buf) goto error; - msz = sizeof(attr); + /* the last event terminates with evsel->attr.size == 0: */ + events = calloc(nre + 1, sizeof(*events)); + if (!events) + goto error; + + msz = sizeof(evsel->attr); if (sz < msz) msz = sz; - for (i = 0 ; i < nre; i++) { + for (i = 0, evsel = events; i < nre; evsel++, i++) { + evsel->idx = i; /* * must read entire on-file attr struct to @@ -1197,7 +1258,7 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) if (ph->needs_swap) perf_event__attr_swap(buf); - memcpy(&attr, buf, msz); + memcpy(&evsel->attr, buf, msz); ret = read(fd, &nr, sizeof(nr)); if (ret != (ssize_t)sizeof(nr)) @@ -1206,51 +1267,82 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) if (ph->needs_swap) nr = bswap_32(nr); - str = do_read_string(fd, ph); - fprintf(fp, "# event : name = %s, ", str); - free(str); + evsel->name = do_read_string(fd, ph); + + if (!nr) + continue; + + id = calloc(nr, sizeof(*id)); + if (!id) + goto error; + evsel->ids = nr; + evsel->id = id; + + for (j = 0 ; j < nr; j++) { + ret = read(fd, id, sizeof(*id)); + if (ret != (ssize_t)sizeof(*id)) + goto error; + if (ph->needs_swap) + *id = bswap_64(*id); + id++; + } + } +out: + if (buf) + free(buf); + return events; +error: + if (events) + free_event_desc(events); + events = NULL; + goto out; +} + +static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) +{ + struct perf_evsel *evsel, *events = read_event_desc(ph, fd); + u32 j; + u64 *id; + + if (!events) { + fprintf(fp, "# event desc: not available or unable to read\n"); + return; + } + + for (evsel = events; evsel->attr.size; evsel++) { + fprintf(fp, "# event : name = %s, ", evsel->name); fprintf(fp, "type = %d, config = 0x%"PRIx64 ", config1 = 0x%"PRIx64", config2 = 0x%"PRIx64, - attr.type, - (u64)attr.config, - (u64)attr.config1, - (u64)attr.config2); + evsel->attr.type, + (u64)evsel->attr.config, + (u64)evsel->attr.config1, + (u64)evsel->attr.config2); fprintf(fp, ", excl_usr = %d, excl_kern = %d", - attr.exclude_user, - attr.exclude_kernel); + evsel->attr.exclude_user, + evsel->attr.exclude_kernel); fprintf(fp, ", excl_host = %d, excl_guest = %d", - attr.exclude_host, - attr.exclude_guest); + evsel->attr.exclude_host, + evsel->attr.exclude_guest); - fprintf(fp, ", precise_ip = %d", attr.precise_ip); + fprintf(fp, ", precise_ip = %d", evsel->attr.precise_ip); - if (nr) + if (evsel->ids) { fprintf(fp, ", id = {"); - - for (j = 0 ; j < nr; j++) { - ret = read(fd, &id, sizeof(id)); - if (ret != (ssize_t)sizeof(id)) - goto error; - - if (ph->needs_swap) - id = bswap_64(id); - - if (j) - fputc(',', fp); - - fprintf(fp, " %"PRIu64, id); - } - if (nr && j == nr) + for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) { + if (j) + fputc(',', fp); + fprintf(fp, " %"PRIu64, *id); + } fprintf(fp, " }"); + } + fputc('\n', fp); } - free(buf); - return; -error: - fprintf(fp, "# event desc: not available or unable to read\n"); + + free_event_desc(events); } static void print_total_mem(struct perf_header *h __used, int fd, FILE *fp) @@ -1337,6 +1429,43 @@ static void print_branch_stack(struct perf_header *ph __used, int fd __used, fprintf(fp, "# contains samples with branch stack\n"); } +static void print_pmu_mappings(struct perf_header *ph, int fd, FILE *fp) +{ + const char *delimiter = "# pmu mappings: "; + char *name; + int ret; + u32 pmu_num; + u32 type; + + ret = read(fd, &pmu_num, sizeof(pmu_num)); + if (ret != sizeof(pmu_num)) + goto error; + + if (!pmu_num) { + fprintf(fp, "# pmu mappings: not available\n"); + return; + } + + while (pmu_num) { + if (read(fd, &type, sizeof(type)) != sizeof(type)) + break; + name = do_read_string(fd, ph); + if (!name) + break; + pmu_num--; + fprintf(fp, "%s%s = %" PRIu32, delimiter, name, type); + free(name); + delimiter = ", "; + } + + fprintf(fp, "\n"); + + if (!pmu_num) + return; +error: + fprintf(fp, "# pmu mappings: unable to read\n"); +} + static int __event_process_build_id(struct build_id_event *bev, char *filename, struct perf_session *session) @@ -1504,6 +1633,56 @@ static int process_build_id(struct perf_file_section *section, return 0; } +static struct perf_evsel * +perf_evlist__find_by_index(struct perf_evlist *evlist, int idx) +{ + struct perf_evsel *evsel; + + list_for_each_entry(evsel, &evlist->entries, node) { + if (evsel->idx == idx) + return evsel; + } + + return NULL; +} + +static void +perf_evlist__set_event_name(struct perf_evlist *evlist, struct perf_evsel *event) +{ + struct perf_evsel *evsel; + + if (!event->name) + return; + + evsel = perf_evlist__find_by_index(evlist, event->idx); + if (!evsel) + return; + + if (evsel->name) + return; + + evsel->name = strdup(event->name); +} + +static int +process_event_desc(struct perf_file_section *section __unused, + struct perf_header *header, int feat __unused, int fd, + void *data __used) +{ + struct perf_session *session = container_of(header, struct perf_session, header); + struct perf_evsel *evsel, *events = read_event_desc(header, fd); + + if (!events) + return 0; + + for (evsel = events; evsel->attr.size; evsel++) + perf_evlist__set_event_name(session->evlist, evsel); + + free_event_desc(events); + + return 0; +} + struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); @@ -1537,11 +1716,12 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPA(HEADER_CPUDESC, cpudesc), FEAT_OPA(HEADER_CPUID, cpuid), FEAT_OPA(HEADER_TOTAL_MEM, total_mem), - FEAT_OPA(HEADER_EVENT_DESC, event_desc), + FEAT_OPP(HEADER_EVENT_DESC, event_desc), FEAT_OPA(HEADER_CMDLINE, cmdline), FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), + FEAT_OPA(HEADER_PMU_MAPPINGS, pmu_mappings), }; struct header_print_data { @@ -1683,17 +1863,17 @@ int perf_session__write_header(struct perf_session *session, struct perf_file_header f_header; struct perf_file_attr f_attr; struct perf_header *header = &session->header; - struct perf_evsel *attr, *pair = NULL; + struct perf_evsel *evsel, *pair = NULL; int err; lseek(fd, sizeof(f_header), SEEK_SET); if (session->evlist != evlist) - pair = list_entry(session->evlist->entries.next, struct perf_evsel, node); + pair = perf_evlist__first(session->evlist); - list_for_each_entry(attr, &evlist->entries, node) { - attr->id_offset = lseek(fd, 0, SEEK_CUR); - err = do_write(fd, attr->id, attr->ids * sizeof(u64)); + list_for_each_entry(evsel, &evlist->entries, node) { + evsel->id_offset = lseek(fd, 0, SEEK_CUR); + err = do_write(fd, evsel->id, evsel->ids * sizeof(u64)); if (err < 0) { out_err_write: pr_debug("failed to write perf header\n"); @@ -1703,19 +1883,19 @@ out_err_write: err = do_write(fd, pair->id, pair->ids * sizeof(u64)); if (err < 0) goto out_err_write; - attr->ids += pair->ids; - pair = list_entry(pair->node.next, struct perf_evsel, node); + evsel->ids += pair->ids; + pair = perf_evsel__next(pair); } } header->attr_offset = lseek(fd, 0, SEEK_CUR); - list_for_each_entry(attr, &evlist->entries, node) { + list_for_each_entry(evsel, &evlist->entries, node) { f_attr = (struct perf_file_attr){ - .attr = attr->attr, + .attr = evsel->attr, .ids = { - .offset = attr->id_offset, - .size = attr->ids * sizeof(u64), + .offset = evsel->id_offset, + .size = evsel->ids * sizeof(u64), } }; err = do_write(fd, &f_attr, sizeof(f_attr)); @@ -1726,9 +1906,9 @@ out_err_write: } header->event_offset = lseek(fd, 0, SEEK_CUR); - header->event_size = event_count * sizeof(struct perf_trace_event_type); - if (events) { - err = do_write(fd, events, header->event_size); + header->event_size = trace_event_count * sizeof(struct perf_trace_event_type); + if (trace_events) { + err = do_write(fd, trace_events, header->event_size); if (err < 0) { pr_debug("failed to write perf header events\n"); return err; @@ -1829,6 +2009,8 @@ out_free: static const int attr_file_abi_sizes[] = { [0] = PERF_ATTR_SIZE_VER0, [1] = PERF_ATTR_SIZE_VER1, + [2] = PERF_ATTR_SIZE_VER2, + [3] = PERF_ATTR_SIZE_VER3, 0, }; @@ -2123,6 +2305,7 @@ static int perf_evsel__set_tracepoint_name(struct perf_evsel *evsel, if (event->name == NULL) return -1; + evsel->tp_format = event; return 0; } @@ -2207,13 +2390,13 @@ int perf_session__read_header(struct perf_session *session, int fd) if (f_header.event_types.size) { lseek(fd, f_header.event_types.offset, SEEK_SET); - events = malloc(f_header.event_types.size); - if (events == NULL) + trace_events = malloc(f_header.event_types.size); + if (trace_events == NULL) return -ENOMEM; - if (perf_header__getbuffer64(header, fd, events, + if (perf_header__getbuffer64(header, fd, trace_events, f_header.event_types.size)) goto out_errno; - event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); + trace_event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } perf_header__process_sections(header, fd, &session->pevent, @@ -2236,7 +2419,7 @@ out_delete_evlist: } int perf_event__synthesize_attr(struct perf_tool *tool, - struct perf_event_attr *attr, u16 ids, u64 *id, + struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process) { union perf_event *ev; @@ -2257,9 +2440,12 @@ int perf_event__synthesize_attr(struct perf_tool *tool, memcpy(ev->attr.id, id, ids * sizeof(u64)); ev->attr.header.type = PERF_RECORD_HEADER_ATTR; - ev->attr.header.size = size; + ev->attr.header.size = (u16)size; - err = process(tool, ev, NULL, NULL); + if (ev->attr.header.size == size) + err = process(tool, ev, NULL, NULL); + else + err = -E2BIG; free(ev); @@ -2270,12 +2456,12 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, struct perf_session *session, perf_event__handler_t process) { - struct perf_evsel *attr; + struct perf_evsel *evsel; int err = 0; - list_for_each_entry(attr, &session->evlist->entries, node) { - err = perf_event__synthesize_attr(tool, &attr->attr, attr->ids, - attr->id, process); + list_for_each_entry(evsel, &session->evlist->entries, node) { + err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids, + evsel->id, process); if (err) { pr_debug("failed to create perf header attribute\n"); return err; @@ -2288,7 +2474,7 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, int perf_event__process_attr(union perf_event *event, struct perf_evlist **pevlist) { - unsigned int i, ids, n_ids; + u32 i, ids, n_ids; struct perf_evsel *evsel; struct perf_evlist *evlist = *pevlist; @@ -2355,8 +2541,8 @@ int perf_event__synthesize_event_types(struct perf_tool *tool, struct perf_trace_event_type *type; int i, err = 0; - for (i = 0; i < event_count; i++) { - type = &events[i]; + for (i = 0; i < trace_event_count; i++) { + type = &trace_events[i]; err = perf_event__synthesize_event_type(tool, type->event_id, type->name, process, @@ -2452,6 +2638,8 @@ int perf_event__process_tracing_data(union perf_event *event, if (size_read + padding != size) die("tracing data size mismatch"); + perf_evlist__set_tracepoint_names(session->evlist, session->pevent); + return size_read + padding; } diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 2d42b3e1826f..9d5eedceda72 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -28,6 +28,7 @@ enum { HEADER_CPU_TOPOLOGY, HEADER_NUMA_TOPOLOGY, HEADER_BRANCH_STACK, + HEADER_PMU_MAPPINGS, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; @@ -99,7 +100,7 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); int perf_event__synthesize_attr(struct perf_tool *tool, - struct perf_event_attr *attr, u16 ids, u64 *id, + struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); int perf_event__synthesize_attrs(struct perf_tool *tool, struct perf_session *session, diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index f247ef2789a4..b1817f15bb87 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -45,7 +45,7 @@ bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len) return false; } -static void hists__reset_col_len(struct hists *hists) +void hists__reset_col_len(struct hists *hists) { enum hist_column col; @@ -63,7 +63,7 @@ static void hists__set_unres_dso_col_len(struct hists *hists, int dso) hists__set_col_len(hists, dso, unresolved_col_width); } -static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) +void hists__calc_col_len(struct hists *hists, struct hist_entry *h) { const unsigned int unresolved_col_width = BITS_PER_LONG / 4; u16 len; @@ -114,6 +114,22 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) } } +void hists__output_recalc_col_len(struct hists *hists, int max_rows) +{ + struct rb_node *next = rb_first(&hists->entries); + struct hist_entry *n; + int row = 0; + + hists__reset_col_len(hists); + + while (next && row++ < max_rows) { + n = rb_entry(next, struct hist_entry, rb_node); + if (!n->filtered) + hists__calc_col_len(hists, n); + next = rb_next(&n->rb_node); + } +} + static void hist_entry__add_cpumode_period(struct hist_entry *he, unsigned int cpumode, u64 period) { @@ -547,641 +563,6 @@ void hists__output_resort_threaded(struct hists *hists) return __hists__output_resort(hists, true); } -static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) -{ - int i; - int ret = fprintf(fp, " "); - - for (i = 0; i < left_margin; i++) - ret += fprintf(fp, " "); - - return ret; -} - -static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask, - int left_margin) -{ - int i; - size_t ret = callchain__fprintf_left_margin(fp, left_margin); - - for (i = 0; i < depth; i++) - if (depth_mask & (1 << i)) - ret += fprintf(fp, "| "); - else - ret += fprintf(fp, " "); - - ret += fprintf(fp, "\n"); - - return ret; -} - -static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, - int depth, int depth_mask, int period, - u64 total_samples, u64 hits, - int left_margin) -{ - int i; - size_t ret = 0; - - ret += callchain__fprintf_left_margin(fp, left_margin); - for (i = 0; i < depth; i++) { - if (depth_mask & (1 << i)) - ret += fprintf(fp, "|"); - else - ret += fprintf(fp, " "); - if (!period && i == depth - 1) { - double percent; - - percent = hits * 100.0 / total_samples; - ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent); - } else - ret += fprintf(fp, "%s", " "); - } - if (chain->ms.sym) - ret += fprintf(fp, "%s\n", chain->ms.sym->name); - else - ret += fprintf(fp, "0x%0" PRIx64 "\n", chain->ip); - - return ret; -} - -static struct symbol *rem_sq_bracket; -static struct callchain_list rem_hits; - -static void init_rem_hits(void) -{ - rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6); - if (!rem_sq_bracket) { - fprintf(stderr, "Not enough memory to display remaining hits\n"); - return; - } - - strcpy(rem_sq_bracket->name, "[...]"); - rem_hits.ms.sym = rem_sq_bracket; -} - -static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, - u64 total_samples, int depth, - int depth_mask, int left_margin) -{ - struct rb_node *node, *next; - struct callchain_node *child; - struct callchain_list *chain; - int new_depth_mask = depth_mask; - u64 remaining; - size_t ret = 0; - int i; - uint entries_printed = 0; - - remaining = total_samples; - - node = rb_first(root); - while (node) { - u64 new_total; - u64 cumul; - - child = rb_entry(node, struct callchain_node, rb_node); - cumul = callchain_cumul_hits(child); - remaining -= cumul; - - /* - * The depth mask manages the output of pipes that show - * the depth. We don't want to keep the pipes of the current - * level for the last child of this depth. - * Except if we have remaining filtered hits. They will - * supersede the last child - */ - next = rb_next(node); - if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining)) - new_depth_mask &= ~(1 << (depth - 1)); - - /* - * But we keep the older depth mask for the line separator - * to keep the level link until we reach the last child - */ - ret += ipchain__fprintf_graph_line(fp, depth, depth_mask, - left_margin); - i = 0; - list_for_each_entry(chain, &child->val, list) { - ret += ipchain__fprintf_graph(fp, chain, depth, - new_depth_mask, i++, - total_samples, - cumul, - left_margin); - } - - if (callchain_param.mode == CHAIN_GRAPH_REL) - new_total = child->children_hit; - else - new_total = total_samples; - - ret += __callchain__fprintf_graph(fp, &child->rb_root, new_total, - depth + 1, - new_depth_mask | (1 << depth), - left_margin); - node = next; - if (++entries_printed == callchain_param.print_limit) - break; - } - - if (callchain_param.mode == CHAIN_GRAPH_REL && - remaining && remaining != total_samples) { - - if (!rem_sq_bracket) - return ret; - - new_depth_mask &= ~(1 << (depth - 1)); - ret += ipchain__fprintf_graph(fp, &rem_hits, depth, - new_depth_mask, 0, total_samples, - remaining, left_margin); - } - - return ret; -} - -static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, - u64 total_samples, int left_margin) -{ - struct callchain_node *cnode; - struct callchain_list *chain; - u32 entries_printed = 0; - bool printed = false; - struct rb_node *node; - int i = 0; - int ret = 0; - - /* - * If have one single callchain root, don't bother printing - * its percentage (100 % in fractal mode and the same percentage - * than the hist in graph mode). This also avoid one level of column. - */ - node = rb_first(root); - if (node && !rb_next(node)) { - cnode = rb_entry(node, struct callchain_node, rb_node); - list_for_each_entry(chain, &cnode->val, list) { - /* - * If we sort by symbol, the first entry is the same than - * the symbol. No need to print it otherwise it appears as - * displayed twice. - */ - if (!i++ && sort__first_dimension == SORT_SYM) - continue; - if (!printed) { - ret += callchain__fprintf_left_margin(fp, left_margin); - ret += fprintf(fp, "|\n"); - ret += callchain__fprintf_left_margin(fp, left_margin); - ret += fprintf(fp, "---"); - left_margin += 3; - printed = true; - } else - ret += callchain__fprintf_left_margin(fp, left_margin); - - if (chain->ms.sym) - ret += fprintf(fp, " %s\n", chain->ms.sym->name); - else - ret += fprintf(fp, " %p\n", (void *)(long)chain->ip); - - if (++entries_printed == callchain_param.print_limit) - break; - } - root = &cnode->rb_root; - } - - ret += __callchain__fprintf_graph(fp, root, total_samples, - 1, 1, left_margin); - ret += fprintf(fp, "\n"); - - return ret; -} - -static size_t __callchain__fprintf_flat(FILE *fp, - struct callchain_node *self, - u64 total_samples) -{ - struct callchain_list *chain; - size_t ret = 0; - - if (!self) - return 0; - - ret += __callchain__fprintf_flat(fp, self->parent, total_samples); - - - list_for_each_entry(chain, &self->val, list) { - if (chain->ip >= PERF_CONTEXT_MAX) - continue; - if (chain->ms.sym) - ret += fprintf(fp, " %s\n", chain->ms.sym->name); - else - ret += fprintf(fp, " %p\n", - (void *)(long)chain->ip); - } - - return ret; -} - -static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *self, - u64 total_samples) -{ - size_t ret = 0; - u32 entries_printed = 0; - struct rb_node *rb_node; - struct callchain_node *chain; - - rb_node = rb_first(self); - while (rb_node) { - double percent; - - chain = rb_entry(rb_node, struct callchain_node, rb_node); - percent = chain->hit * 100.0 / total_samples; - - ret = percent_color_fprintf(fp, " %6.2f%%\n", percent); - ret += __callchain__fprintf_flat(fp, chain, total_samples); - ret += fprintf(fp, "\n"); - if (++entries_printed == callchain_param.print_limit) - break; - - rb_node = rb_next(rb_node); - } - - return ret; -} - -static size_t hist_entry_callchain__fprintf(struct hist_entry *he, - u64 total_samples, int left_margin, - FILE *fp) -{ - switch (callchain_param.mode) { - case CHAIN_GRAPH_REL: - return callchain__fprintf_graph(fp, &he->sorted_chain, he->period, - left_margin); - break; - case CHAIN_GRAPH_ABS: - return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, - left_margin); - break; - case CHAIN_FLAT: - return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples); - break; - case CHAIN_NONE: - break; - default: - pr_err("Bad callchain mode\n"); - } - - return 0; -} - -void hists__output_recalc_col_len(struct hists *hists, int max_rows) -{ - struct rb_node *next = rb_first(&hists->entries); - struct hist_entry *n; - int row = 0; - - hists__reset_col_len(hists); - - while (next && row++ < max_rows) { - n = rb_entry(next, struct hist_entry, rb_node); - if (!n->filtered) - hists__calc_col_len(hists, n); - next = rb_next(&n->rb_node); - } -} - -static int hist_entry__pcnt_snprintf(struct hist_entry *he, char *s, - size_t size, struct hists *pair_hists, - bool show_displacement, long displacement, - bool color, u64 total_period) -{ - u64 period, total, period_sys, period_us, period_guest_sys, period_guest_us; - u64 nr_events; - const char *sep = symbol_conf.field_sep; - int ret; - - if (symbol_conf.exclude_other && !he->parent) - return 0; - - if (pair_hists) { - period = he->pair ? he->pair->period : 0; - nr_events = he->pair ? he->pair->nr_events : 0; - total = pair_hists->stats.total_period; - period_sys = he->pair ? he->pair->period_sys : 0; - period_us = he->pair ? he->pair->period_us : 0; - period_guest_sys = he->pair ? he->pair->period_guest_sys : 0; - period_guest_us = he->pair ? he->pair->period_guest_us : 0; - } else { - period = he->period; - nr_events = he->nr_events; - total = total_period; - period_sys = he->period_sys; - period_us = he->period_us; - period_guest_sys = he->period_guest_sys; - period_guest_us = he->period_guest_us; - } - - if (total) { - if (color) - ret = percent_color_snprintf(s, size, - sep ? "%.2f" : " %6.2f%%", - (period * 100.0) / total); - else - ret = scnprintf(s, size, sep ? "%.2f" : " %6.2f%%", - (period * 100.0) / total); - if (symbol_conf.show_cpu_utilization) { - ret += percent_color_snprintf(s + ret, size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_sys * 100.0) / total); - ret += percent_color_snprintf(s + ret, size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_us * 100.0) / total); - if (perf_guest) { - ret += percent_color_snprintf(s + ret, - size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_guest_sys * 100.0) / - total); - ret += percent_color_snprintf(s + ret, - size - ret, - sep ? "%.2f" : " %6.2f%%", - (period_guest_us * 100.0) / - total); - } - } - } else - ret = scnprintf(s, size, sep ? "%" PRIu64 : "%12" PRIu64 " ", period); - - if (symbol_conf.show_nr_samples) { - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, nr_events); - else - ret += scnprintf(s + ret, size - ret, "%11" PRIu64, nr_events); - } - - if (symbol_conf.show_total_period) { - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%" PRIu64, *sep, period); - else - ret += scnprintf(s + ret, size - ret, " %12" PRIu64, period); - } - - if (pair_hists) { - char bf[32]; - double old_percent = 0, new_percent = 0, diff; - - if (total > 0) - old_percent = (period * 100.0) / total; - if (total_period > 0) - new_percent = (he->period * 100.0) / total_period; - - diff = new_percent - old_percent; - - if (fabs(diff) >= 0.01) - scnprintf(bf, sizeof(bf), "%+4.2F%%", diff); - else - scnprintf(bf, sizeof(bf), " "); - - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); - else - ret += scnprintf(s + ret, size - ret, "%11.11s", bf); - - if (show_displacement) { - if (displacement) - scnprintf(bf, sizeof(bf), "%+4ld", displacement); - else - scnprintf(bf, sizeof(bf), " "); - - if (sep) - ret += scnprintf(s + ret, size - ret, "%c%s", *sep, bf); - else - ret += scnprintf(s + ret, size - ret, "%6.6s", bf); - } - } - - return ret; -} - -int hist_entry__snprintf(struct hist_entry *he, char *s, size_t size, - struct hists *hists) -{ - const char *sep = symbol_conf.field_sep; - struct sort_entry *se; - int ret = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - if (se->elide) - continue; - - ret += scnprintf(s + ret, size - ret, "%s", sep ?: " "); - ret += se->se_snprintf(he, s + ret, size - ret, - hists__col_len(hists, se->se_width_idx)); - } - - return ret; -} - -static int hist_entry__fprintf(struct hist_entry *he, size_t size, - struct hists *hists, struct hists *pair_hists, - bool show_displacement, long displacement, - u64 total_period, FILE *fp) -{ - char bf[512]; - int ret; - - if (size == 0 || size > sizeof(bf)) - size = sizeof(bf); - - ret = hist_entry__pcnt_snprintf(he, bf, size, pair_hists, - show_displacement, displacement, - true, total_period); - hist_entry__snprintf(he, bf + ret, size - ret, hists); - return fprintf(fp, "%s\n", bf); -} - -static size_t hist_entry__fprintf_callchain(struct hist_entry *he, - struct hists *hists, - u64 total_period, FILE *fp) -{ - int left_margin = 0; - - if (sort__first_dimension == SORT_COMM) { - struct sort_entry *se = list_first_entry(&hist_entry__sort_list, - typeof(*se), list); - left_margin = hists__col_len(hists, se->se_width_idx); - left_margin -= thread__comm_len(he->thread); - } - - return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); -} - -size_t hists__fprintf(struct hists *hists, struct hists *pair, - bool show_displacement, bool show_header, int max_rows, - int max_cols, FILE *fp) -{ - struct sort_entry *se; - struct rb_node *nd; - size_t ret = 0; - u64 total_period; - unsigned long position = 1; - long displacement = 0; - unsigned int width; - const char *sep = symbol_conf.field_sep; - const char *col_width = symbol_conf.col_width_list_str; - int nr_rows = 0; - - init_rem_hits(); - - if (!show_header) - goto print_entries; - - fprintf(fp, "# %s", pair ? "Baseline" : "Overhead"); - - if (symbol_conf.show_cpu_utilization) { - if (sep) { - ret += fprintf(fp, "%csys", *sep); - ret += fprintf(fp, "%cus", *sep); - if (perf_guest) { - ret += fprintf(fp, "%cguest sys", *sep); - ret += fprintf(fp, "%cguest us", *sep); - } - } else { - ret += fprintf(fp, " sys "); - ret += fprintf(fp, " us "); - if (perf_guest) { - ret += fprintf(fp, " guest sys "); - ret += fprintf(fp, " guest us "); - } - } - } - - if (symbol_conf.show_nr_samples) { - if (sep) - fprintf(fp, "%cSamples", *sep); - else - fputs(" Samples ", fp); - } - - if (symbol_conf.show_total_period) { - if (sep) - ret += fprintf(fp, "%cPeriod", *sep); - else - ret += fprintf(fp, " Period "); - } - - if (pair) { - if (sep) - ret += fprintf(fp, "%cDelta", *sep); - else - ret += fprintf(fp, " Delta "); - - if (show_displacement) { - if (sep) - ret += fprintf(fp, "%cDisplacement", *sep); - else - ret += fprintf(fp, " Displ"); - } - } - - list_for_each_entry(se, &hist_entry__sort_list, list) { - if (se->elide) - continue; - if (sep) { - fprintf(fp, "%c%s", *sep, se->se_header); - continue; - } - width = strlen(se->se_header); - if (symbol_conf.col_width_list_str) { - if (col_width) { - hists__set_col_len(hists, se->se_width_idx, - atoi(col_width)); - col_width = strchr(col_width, ','); - if (col_width) - ++col_width; - } - } - if (!hists__new_col_len(hists, se->se_width_idx, width)) - width = hists__col_len(hists, se->se_width_idx); - fprintf(fp, " %*s", width, se->se_header); - } - - fprintf(fp, "\n"); - if (max_rows && ++nr_rows >= max_rows) - goto out; - - if (sep) - goto print_entries; - - fprintf(fp, "# ........"); - if (symbol_conf.show_cpu_utilization) - fprintf(fp, " ....... ......."); - if (symbol_conf.show_nr_samples) - fprintf(fp, " .........."); - if (symbol_conf.show_total_period) - fprintf(fp, " ............"); - if (pair) { - fprintf(fp, " .........."); - if (show_displacement) - fprintf(fp, " ....."); - } - list_for_each_entry(se, &hist_entry__sort_list, list) { - unsigned int i; - - if (se->elide) - continue; - - fprintf(fp, " "); - width = hists__col_len(hists, se->se_width_idx); - if (width == 0) - width = strlen(se->se_header); - for (i = 0; i < width; i++) - fprintf(fp, "."); - } - - fprintf(fp, "\n"); - if (max_rows && ++nr_rows >= max_rows) - goto out; - - fprintf(fp, "#\n"); - if (max_rows && ++nr_rows >= max_rows) - goto out; - -print_entries: - total_period = hists->stats.total_period; - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (h->filtered) - continue; - - if (show_displacement) { - if (h->pair != NULL) - displacement = ((long)h->pair->position - - (long)position); - else - displacement = 0; - ++position; - } - ret += hist_entry__fprintf(h, max_cols, hists, pair, show_displacement, - displacement, total_period, fp); - - if (symbol_conf.use_callchain) - ret += hist_entry__fprintf_callchain(h, hists, total_period, fp); - if (max_rows && ++nr_rows >= max_rows) - goto out; - - if (h->ms.map == NULL && verbose > 1) { - __map_groups__fprintf_maps(&h->thread->mg, - MAP__FUNCTION, verbose, fp); - fprintf(fp, "%.10s end\n", graph_dotted_line); - } - } -out: - free(rem_sq_bracket); - - return ret; -} - /* * See hists__fprintf to match the column widths */ @@ -1342,25 +723,3 @@ void hists__inc_nr_events(struct hists *hists, u32 type) ++hists->stats.nr_events[0]; ++hists->stats.nr_events[type]; } - -size_t hists__fprintf_nr_events(struct hists *hists, FILE *fp) -{ - int i; - size_t ret = 0; - - for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { - const char *name; - - if (hists->stats.nr_events[i] == 0) - continue; - - name = perf_event__name(i); - if (!strcmp(name, "UNKNOWN")) - continue; - - ret += fprintf(fp, "%16s events: %10d\n", name, - hists->stats.nr_events[i]); - } - - return ret; -} diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 0b096c27a419..2e650ffb7d23 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -75,8 +75,8 @@ struct hist_entry *__hists__add_entry(struct hists *self, struct symbol *parent, u64 period); int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); -int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, - struct hists *hists); +int hist_entry__sort_snprintf(struct hist_entry *self, char *bf, size_t size, + struct hists *hists); void hist_entry__free(struct hist_entry *); struct hist_entry *__hists__add_branch_entry(struct hists *self, @@ -112,6 +112,8 @@ void hists__filter_by_symbol(struct hists *hists); u16 hists__col_len(struct hists *self, enum hist_column col); void hists__set_col_len(struct hists *self, enum hist_column col, u16 len); bool hists__new_col_len(struct hists *self, enum hist_column col, u16 len); +void hists__reset_col_len(struct hists *hists); +void hists__calc_col_len(struct hists *hists, struct hist_entry *he); struct perf_evlist; diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h index 547628e97f3d..2dc867128e46 100644 --- a/tools/perf/util/include/linux/compiler.h +++ b/tools/perf/util/include/linux/compiler.h @@ -10,5 +10,6 @@ #endif #define __used __attribute__((__unused__)) +#define __packed __attribute__((__packed__)) #endif diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index cc33486ad9e2..7d37159c1e99 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -86,6 +86,25 @@ out_delete: return NULL; } +/* + * Constructor variant for modules (where we know from /proc/modules where + * they are loaded) and for vmlinux, where only after we load all the + * symbols we'll know where it starts and ends. + */ +struct map *map__new2(u64 start, struct dso *dso, enum map_type type) +{ + struct map *map = calloc(1, (sizeof(*map) + + (dso->kernel ? sizeof(struct kmap) : 0))); + if (map != NULL) { + /* + * ->end will be filled after we load all the symbols + */ + map__init(map, type, start, 0, 0, dso); + } + + return map; +} + void map__delete(struct map *self) { free(self); @@ -137,6 +156,7 @@ int map__load(struct map *self, symbol_filter_t filter) pr_warning(", continuing without symbols\n"); return -1; } else if (nr == 0) { +#ifndef NO_LIBELF_SUPPORT const size_t len = strlen(name); const size_t real_len = len - sizeof(DSO__DELETED); @@ -149,7 +169,7 @@ int map__load(struct map *self, symbol_filter_t filter) pr_warning("no symbols found in %s, maybe install " "a debug package?\n", name); } - +#endif return -1; } /* @@ -242,14 +262,6 @@ u64 map__rip_2objdump(struct map *map, u64 rip) return addr; } -u64 map__objdump_2ip(struct map *map, u64 addr) -{ - u64 ip = map->dso->adjust_symbols ? - addr : - map->unmap_ip(map, addr); /* RIP -> IP */ - return ip; -} - void map_groups__init(struct map_groups *mg) { int i; diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 03a1e9b08b21..25ab4cdbc446 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -104,7 +104,6 @@ static inline u64 identity__map_ip(struct map *map __used, u64 ip) /* rip/ip <-> addr suitable for passing to `objdump --start-address=` */ u64 map__rip_2objdump(struct map *map, u64 rip); -u64 map__objdump_2ip(struct map *map, u64 addr); struct symbol; @@ -115,6 +114,7 @@ void map__init(struct map *self, enum map_type type, struct map *map__new(struct list_head *dsos__list, u64 start, u64 len, u64 pgoff, u32 pid, char *filename, enum map_type type); +struct map *map__new2(u64 start, struct dso *dso, enum map_type type); void map__delete(struct map *self); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); @@ -157,9 +157,12 @@ int machine__init(struct machine *self, const char *root_dir, pid_t pid); void machine__exit(struct machine *self); void machine__delete(struct machine *self); +struct perf_evsel; +struct perf_sample; int machine__resolve_callchain(struct machine *machine, + struct perf_evsel *evsel, struct thread *thread, - struct ip_callchain *chain, + struct perf_sample *sample, struct symbol **parent); int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name, u64 addr); diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index 127d648cc548..bc8b65130ae0 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -18,8 +18,7 @@ do { \ static int test__checkevent_tracepoint(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); @@ -48,8 +47,7 @@ static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist) static int test__checkevent_raw(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); @@ -59,8 +57,7 @@ static int test__checkevent_raw(struct perf_evlist *evlist) static int test__checkevent_numeric(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); @@ -70,8 +67,7 @@ static int test__checkevent_numeric(struct perf_evlist *evlist) static int test__checkevent_symbolic_name(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); @@ -82,8 +78,7 @@ static int test__checkevent_symbolic_name(struct perf_evlist *evlist) static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); @@ -100,8 +95,7 @@ static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist) static int test__checkevent_symbolic_alias(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type); @@ -112,8 +106,7 @@ static int test__checkevent_symbolic_alias(struct perf_evlist *evlist) static int test__checkevent_genhw(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->attr.type); @@ -123,8 +116,7 @@ static int test__checkevent_genhw(struct perf_evlist *evlist) static int test__checkevent_breakpoint(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); @@ -138,8 +130,7 @@ static int test__checkevent_breakpoint(struct perf_evlist *evlist) static int test__checkevent_breakpoint_x(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); @@ -152,8 +143,7 @@ static int test__checkevent_breakpoint_x(struct perf_evlist *evlist) static int test__checkevent_breakpoint_r(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", @@ -168,8 +158,7 @@ static int test__checkevent_breakpoint_r(struct perf_evlist *evlist) static int test__checkevent_breakpoint_w(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", @@ -184,8 +173,7 @@ static int test__checkevent_breakpoint_w(struct perf_evlist *evlist) static int test__checkevent_breakpoint_rw(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", @@ -200,8 +188,7 @@ static int test__checkevent_breakpoint_rw(struct perf_evlist *evlist) static int test__checkevent_tracepoint_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); @@ -232,8 +219,7 @@ test__checkevent_tracepoint_multi_modifier(struct perf_evlist *evlist) static int test__checkevent_raw_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); @@ -245,8 +231,7 @@ static int test__checkevent_raw_modifier(struct perf_evlist *evlist) static int test__checkevent_numeric_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); @@ -258,8 +243,7 @@ static int test__checkevent_numeric_modifier(struct perf_evlist *evlist) static int test__checkevent_symbolic_name_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); @@ -271,8 +255,7 @@ static int test__checkevent_symbolic_name_modifier(struct perf_evlist *evlist) static int test__checkevent_exclude_host_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); @@ -282,8 +265,7 @@ static int test__checkevent_exclude_host_modifier(struct perf_evlist *evlist) static int test__checkevent_exclude_guest_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); @@ -293,8 +275,7 @@ static int test__checkevent_exclude_guest_modifier(struct perf_evlist *evlist) static int test__checkevent_symbolic_alias_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); @@ -306,8 +287,7 @@ static int test__checkevent_symbolic_alias_modifier(struct perf_evlist *evlist) static int test__checkevent_genhw_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); @@ -319,75 +299,71 @@ static int test__checkevent_genhw_modifier(struct perf_evlist *evlist) static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:rw:u")); + !strcmp(perf_evsel__name(evsel), "mem:0:u")); return test__checkevent_breakpoint(evlist); } static int test__checkevent_breakpoint_x_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:x:k")); + !strcmp(perf_evsel__name(evsel), "mem:0:x:k")); return test__checkevent_breakpoint_x(evlist); } static int test__checkevent_breakpoint_r_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:r:hp")); + !strcmp(perf_evsel__name(evsel), "mem:0:r:hp")); return test__checkevent_breakpoint_r(evlist); } static int test__checkevent_breakpoint_w_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:w:up")); + !strcmp(perf_evsel__name(evsel), "mem:0:w:up")); return test__checkevent_breakpoint_w(evlist); } static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "mem:0x0:rw:kp")); + !strcmp(perf_evsel__name(evsel), "mem:0:rw:kp")); return test__checkevent_breakpoint_rw(evlist); } @@ -395,8 +371,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist) static int test__checkevent_pmu(struct perf_evlist *evlist) { - struct perf_evsel *evsel = list_entry(evlist->entries.next, - struct perf_evsel, node); + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); @@ -410,12 +385,11 @@ static int test__checkevent_pmu(struct perf_evlist *evlist) static int test__checkevent_list(struct perf_evlist *evlist) { - struct perf_evsel *evsel; + struct perf_evsel *evsel = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries); /* r1 */ - evsel = list_entry(evlist->entries.next, struct perf_evsel, node); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); TEST_ASSERT_VAL("wrong config1", 0 == evsel->attr.config1); @@ -426,7 +400,7 @@ static int test__checkevent_list(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); /* syscalls:sys_enter_open:k */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); TEST_ASSERT_VAL("wrong sample_type", PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type); @@ -437,7 +411,7 @@ static int test__checkevent_list(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); /* 1:1:hp */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); @@ -450,22 +424,21 @@ static int test__checkevent_list(struct perf_evlist *evlist) static int test__checkevent_pmu_name(struct perf_evlist *evlist) { - struct perf_evsel *evsel; + struct perf_evsel *evsel = perf_evlist__first(evlist); /* cpu/config=1,name=krava/u */ - evsel = list_entry(evlist->entries.next, struct perf_evsel, node); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava")); /* cpu/config=2/u" */ - evsel = list_entry(evsel->node.next, struct perf_evsel, node); + evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config); TEST_ASSERT_VAL("wrong name", - !strcmp(perf_evsel__name(evsel), "raw 0x2:u")); + !strcmp(perf_evsel__name(evsel), "cpu/config=2/u")); return 0; } @@ -513,6 +486,280 @@ static int test__checkterms_simple(struct list_head *terms) return 0; } +static int test__group1(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + + /* instructions:k */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* cycles:upp */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + return 0; +} + +static int test__group2(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries); + + /* faults + :ku modifier */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_SW_PAGE_FAULTS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* cache-references + :u modifier */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CACHE_REFERENCES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + /* cycles:k */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + return 0; +} + +static int test__group3(struct perf_evlist *evlist __used) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); + + /* group1 syscalls:sys_enter_open:H */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong sample_type", + PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type); + TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + TEST_ASSERT_VAL("wrong group name", + !strcmp(leader->group_name, "group1")); + + /* group1 cycles:kppp */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 3); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + + /* group2 cycles + G modifier */ + evsel = leader = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + TEST_ASSERT_VAL("wrong group name", + !strcmp(leader->group_name, "group2")); + + /* group2 1:3 + G modifier */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 3 == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + /* instructions:u */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + return 0; +} + +static int test__group4(struct perf_evlist *evlist __used) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); + + /* cycles:u + p */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 1); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* instructions:kp + p */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + return 0; +} + +static int test__group5(struct perf_evlist *evlist __used) +{ + struct perf_evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); + + /* cycles + G */ + evsel = leader = perf_evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* instructions + G */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + /* cycles:G */ + evsel = leader = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + /* instructions:G */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + /* cycles */ + evsel = perf_evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + TEST_ASSERT_VAL("wrong leader", evsel->leader == NULL); + + return 0; +} + struct test__event_st { const char *name; __u32 type; @@ -632,6 +879,26 @@ static struct test__event_st test__events[] = { .name = "mem:0:rw:kp", .check = test__checkevent_breakpoint_rw_modifier, }, + [28] = { + .name = "{instructions:k,cycles:upp}", + .check = test__group1, + }, + [29] = { + .name = "{faults:k,cache-references}:u,cycles:k", + .check = test__group2, + }, + [30] = { + .name = "group1{syscalls:sys_enter_open:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u", + .check = test__group3, + }, + [31] = { + .name = "{cycles:u,instructions:kp}:p", + .check = test__group4, + }, + [32] = { + .name = "{cycles,instructions}:G,{cycles:G,instructions:G},cycles", + .check = test__group5, + }, }; static struct test__event_st test__events_pmu[] = { @@ -658,9 +925,6 @@ static struct test__term test__terms[] = { }, }; -#define TEST__TERMS_CNT (sizeof(test__terms) / \ - sizeof(struct test__term)) - static int test_event(struct test__event_st *e) { struct perf_evlist *evlist; @@ -685,19 +949,19 @@ static int test_event(struct test__event_st *e) static int test_events(struct test__event_st *events, unsigned cnt) { - int ret = 0; + int ret1, ret2 = 0; unsigned i; for (i = 0; i < cnt; i++) { struct test__event_st *e = &events[i]; pr_debug("running test %d '%s'\n", i, e->name); - ret = test_event(e); - if (ret) - break; + ret1 = test_event(e); + if (ret1) + ret2 = ret1; } - return ret; + return ret2; } static int test_term(struct test__term *t) @@ -758,13 +1022,13 @@ static int test_pmu(void) int parse_events__test(void) { - int ret; + int ret1, ret2 = 0; #define TEST_EVENTS(tests) \ do { \ - ret = test_events(tests, ARRAY_SIZE(tests)); \ - if (ret) \ - return ret; \ + ret1 = test_events(tests, ARRAY_SIZE(tests)); \ + if (!ret2) \ + ret2 = ret1; \ } while (0) TEST_EVENTS(test__events); @@ -772,5 +1036,9 @@ do { \ if (test_pmu()) TEST_EVENTS(test__events_pmu); - return test_terms(test__terms, ARRAY_SIZE(test__terms)); + ret1 = test_terms(test__terms, ARRAY_SIZE(test__terms)); + if (!ret2) + ret2 = ret1; + + return ret2; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 74a5af4d33ec..b24630398b92 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -551,7 +551,7 @@ static int config_attr(struct perf_event_attr *attr, } int parse_events_add_numeric(struct list_head **list, int *idx, - unsigned long type, unsigned long config, + u32 type, u64 config, struct list_head *head_config) { struct perf_event_attr attr; @@ -611,26 +611,65 @@ int parse_events_add_pmu(struct list_head **list, int *idx, pmu_event_name(head_config)); } +int parse_events__modifier_group(struct list_head *list, + char *event_mod) +{ + return parse_events__modifier_event(list, event_mod, true); +} + +void parse_events__set_leader(char *name, struct list_head *list) +{ + struct perf_evsel *leader; + + __perf_evlist__set_leader(list); + leader = list_entry(list->next, struct perf_evsel, node); + leader->group_name = name ? strdup(name) : NULL; +} + void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all) { /* * Called for single event definition. Update the - * 'all event' list, and reinit the 'signle event' + * 'all event' list, and reinit the 'single event' * list, for next event definition. */ list_splice_tail(list_event, list_all); free(list_event); } -int parse_events_modifier(struct list_head *list, char *str) +struct event_modifier { + int eu; + int ek; + int eh; + int eH; + int eG; + int precise; + int exclude_GH; +}; + +static int get_event_modifier(struct event_modifier *mod, char *str, + struct perf_evsel *evsel) { - struct perf_evsel *evsel; - int exclude = 0, exclude_GH = 0; - int eu = 0, ek = 0, eh = 0, eH = 0, eG = 0, precise = 0; + int eu = evsel ? evsel->attr.exclude_user : 0; + int ek = evsel ? evsel->attr.exclude_kernel : 0; + int eh = evsel ? evsel->attr.exclude_hv : 0; + int eH = evsel ? evsel->attr.exclude_host : 0; + int eG = evsel ? evsel->attr.exclude_guest : 0; + int precise = evsel ? evsel->attr.precise_ip : 0; - if (str == NULL) - return 0; + int exclude = eu | ek | eh; + int exclude_GH = evsel ? evsel->exclude_GH : 0; + + /* + * We are here for group and 'GH' was not set as event + * modifier and whatever event/group modifier override + * default 'GH' setup. + */ + if (evsel && !exclude_GH) + eH = eG = 0; + + memset(mod, 0, sizeof(*mod)); while (*str) { if (*str == 'u') { @@ -674,13 +713,51 @@ int parse_events_modifier(struct list_head *list, char *str) if (precise > 3) return -EINVAL; + mod->eu = eu; + mod->ek = ek; + mod->eh = eh; + mod->eH = eH; + mod->eG = eG; + mod->precise = precise; + mod->exclude_GH = exclude_GH; + return 0; +} + +int parse_events__modifier_event(struct list_head *list, char *str, bool add) +{ + struct perf_evsel *evsel; + struct event_modifier mod; + + if (str == NULL) + return 0; + + if (!add && get_event_modifier(&mod, str, NULL)) + return -EINVAL; + list_for_each_entry(evsel, list, node) { - evsel->attr.exclude_user = eu; - evsel->attr.exclude_kernel = ek; - evsel->attr.exclude_hv = eh; - evsel->attr.precise_ip = precise; - evsel->attr.exclude_host = eH; - evsel->attr.exclude_guest = eG; + + if (add && get_event_modifier(&mod, str, evsel)) + return -EINVAL; + + evsel->attr.exclude_user = mod.eu; + evsel->attr.exclude_kernel = mod.ek; + evsel->attr.exclude_hv = mod.eh; + evsel->attr.precise_ip = mod.precise; + evsel->attr.exclude_host = mod.eH; + evsel->attr.exclude_guest = mod.eG; + evsel->exclude_GH = mod.exclude_GH; + } + + return 0; +} + +int parse_events_name(struct list_head *list, char *name) +{ + struct perf_evsel *evsel; + + list_for_each_entry(evsel, list, node) { + if (!evsel->name) + evsel->name = strdup(name); } return 0; @@ -769,7 +846,7 @@ int parse_filter(const struct option *opt, const char *str, struct perf_evsel *last = NULL; if (evlist->nr_entries > 0) - last = list_entry(evlist->entries.prev, struct perf_evsel, node); + last = perf_evlist__last(evlist); if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) { fprintf(stderr, @@ -799,7 +876,8 @@ static const char * const event_type_descriptors[] = { * Print the events from <debugfs_mount_point>/tracing/events */ -void print_tracepoint_events(const char *subsys_glob, const char *event_glob) +void print_tracepoint_events(const char *subsys_glob, const char *event_glob, + bool name_only) { DIR *sys_dir, *evt_dir; struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; @@ -829,6 +907,11 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob) !strglobmatch(evt_dirent.d_name, event_glob)) continue; + if (name_only) { + printf("%s:%s ", sys_dirent.d_name, evt_dirent.d_name); + continue; + } + snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); printf(" %-50s [%s]\n", evt_path, @@ -906,7 +989,7 @@ void print_events_type(u8 type) __print_events_type(type, event_symbols_hw, PERF_COUNT_HW_MAX); } -int print_hwcache_events(const char *event_glob) +int print_hwcache_events(const char *event_glob, bool name_only) { unsigned int type, op, i, printed = 0; char name[64]; @@ -923,8 +1006,11 @@ int print_hwcache_events(const char *event_glob) if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; - printf(" %-50s [%s]\n", name, - event_type_descriptors[PERF_TYPE_HW_CACHE]); + if (name_only) + printf("%s ", name); + else + printf(" %-50s [%s]\n", name, + event_type_descriptors[PERF_TYPE_HW_CACHE]); ++printed; } } @@ -934,7 +1020,8 @@ int print_hwcache_events(const char *event_glob) } static void print_symbol_events(const char *event_glob, unsigned type, - struct event_symbol *syms, unsigned max) + struct event_symbol *syms, unsigned max, + bool name_only) { unsigned i, printed = 0; char name[MAX_NAME_LEN]; @@ -946,6 +1033,11 @@ static void print_symbol_events(const char *event_glob, unsigned type, (syms->alias && strglobmatch(syms->alias, event_glob)))) continue; + if (name_only) { + printf("%s ", syms->symbol); + continue; + } + if (strlen(syms->alias)) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else @@ -963,39 +1055,42 @@ static void print_symbol_events(const char *event_glob, unsigned type, /* * Print the help text for the event symbols: */ -void print_events(const char *event_glob) +void print_events(const char *event_glob, bool name_only) { - - printf("\n"); - printf("List of pre-defined events (to be used in -e):\n"); + if (!name_only) { + printf("\n"); + printf("List of pre-defined events (to be used in -e):\n"); + } print_symbol_events(event_glob, PERF_TYPE_HARDWARE, - event_symbols_hw, PERF_COUNT_HW_MAX); + event_symbols_hw, PERF_COUNT_HW_MAX, name_only); print_symbol_events(event_glob, PERF_TYPE_SOFTWARE, - event_symbols_sw, PERF_COUNT_SW_MAX); + event_symbols_sw, PERF_COUNT_SW_MAX, name_only); - print_hwcache_events(event_glob); + print_hwcache_events(event_glob, name_only); if (event_glob != NULL) return; - printf("\n"); - printf(" %-50s [%s]\n", - "rNNN", - event_type_descriptors[PERF_TYPE_RAW]); - printf(" %-50s [%s]\n", - "cpu/t1=v1[,t2=v2,t3 ...]/modifier", - event_type_descriptors[PERF_TYPE_RAW]); - printf(" (see 'perf list --help' on how to encode it)\n"); - printf("\n"); - - printf(" %-50s [%s]\n", - "mem:<addr>[:access]", + if (!name_only) { + printf("\n"); + printf(" %-50s [%s]\n", + "rNNN", + event_type_descriptors[PERF_TYPE_RAW]); + printf(" %-50s [%s]\n", + "cpu/t1=v1[,t2=v2,t3 ...]/modifier", + event_type_descriptors[PERF_TYPE_RAW]); + printf(" (see 'perf list --help' on how to encode it)\n"); + printf("\n"); + + printf(" %-50s [%s]\n", + "mem:<addr>[:access]", event_type_descriptors[PERF_TYPE_BREAKPOINT]); - printf("\n"); + printf("\n"); + } - print_tracepoint_events(NULL, NULL); + print_tracepoint_events(NULL, NULL, name_only); } int parse_events__is_hardcoded_term(struct parse_events__term *term) @@ -1005,7 +1100,7 @@ int parse_events__is_hardcoded_term(struct parse_events__term *term) static int new_term(struct parse_events__term **_term, int type_val, int type_term, char *config, - char *str, long num) + char *str, u64 num) { struct parse_events__term *term; @@ -1034,7 +1129,7 @@ static int new_term(struct parse_events__term **_term, int type_val, } int parse_events__term_num(struct parse_events__term **term, - int type_term, char *config, long num) + int type_term, char *config, u64 num) { return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term, config, NULL, num); diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index ee9c218a193c..c356e443448d 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -55,7 +55,7 @@ struct parse_events__term { char *config; union { char *str; - long num; + u64 num; } val; int type_val; int type_term; @@ -73,17 +73,19 @@ struct parse_events_data__terms { int parse_events__is_hardcoded_term(struct parse_events__term *term); int parse_events__term_num(struct parse_events__term **_term, - int type_term, char *config, long num); + int type_term, char *config, u64 num); int parse_events__term_str(struct parse_events__term **_term, int type_term, char *config, char *str); int parse_events__term_clone(struct parse_events__term **new, struct parse_events__term *term); void parse_events__free_terms(struct list_head *terms); -int parse_events_modifier(struct list_head *list, char *str); +int parse_events__modifier_event(struct list_head *list, char *str, bool add); +int parse_events__modifier_group(struct list_head *list, char *event_mod); +int parse_events_name(struct list_head *list, char *name); int parse_events_add_tracepoint(struct list_head **list, int *idx, char *sys, char *event); int parse_events_add_numeric(struct list_head **list, int *idx, - unsigned long type, unsigned long config, + u32 type, u64 config, struct list_head *head_config); int parse_events_add_cache(struct list_head **list, int *idx, char *type, char *op_result1, char *op_result2); @@ -91,15 +93,17 @@ int parse_events_add_breakpoint(struct list_head **list, int *idx, void *ptr, char *type); int parse_events_add_pmu(struct list_head **list, int *idx, char *pmu , struct list_head *head_config); +void parse_events__set_leader(char *name, struct list_head *list); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); void parse_events_error(void *data, void *scanner, char const *msg); int parse_events__test(void); -void print_events(const char *event_glob); +void print_events(const char *event_glob, bool name_only); void print_events_type(u8 type); -void print_tracepoint_events(const char *subsys_glob, const char *event_glob); -int print_hwcache_events(const char *event_glob); +void print_tracepoint_events(const char *subsys_glob, const char *event_glob, + bool name_only); +int print_hwcache_events(const char *event_glob, bool name_only); extern int is_valid_tracepoint(const char *event_string); extern int valid_debugfs_mount(const char *debugfs); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 384ca74c6b22..f5e28dc68270 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -15,10 +15,10 @@ YYSTYPE *parse_events_get_lval(yyscan_t yyscanner); static int __value(YYSTYPE *yylval, char *str, int base, int token) { - long num; + u64 num; errno = 0; - num = strtoul(str, NULL, base); + num = strtoull(str, NULL, base); if (errno) return PE_ERROR; @@ -70,6 +70,12 @@ static int term(yyscan_t scanner, int type) %} %x mem +%s config +%x event + +group [^,{}/]*[{][^}]*[}][^,{}/]* +event_pmu [^,{}/]+[/][^/]*[/][^,{}/]* +event [^,{}/]+ num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ @@ -84,7 +90,13 @@ modifier_bp [rwx]{1,3} { int start_token; - start_token = (int) parse_events_get_extra(yyscanner); + start_token = parse_events_get_extra(yyscanner); + + if (start_token == PE_START_TERMS) + BEGIN(config); + else if (start_token == PE_START_EVENTS) + BEGIN(event); + if (start_token) { parse_events_set_extra(NULL, yyscanner); return start_token; @@ -92,6 +104,26 @@ modifier_bp [rwx]{1,3} } %} +<event>{ + +{group} { + BEGIN(INITIAL); yyless(0); + } + +{event_pmu} | +{event} { + str(yyscanner, PE_EVENT_NAME); + BEGIN(INITIAL); yyless(0); + return PE_EVENT_NAME; + } + +. | +<<EOF>> { + BEGIN(INITIAL); yyless(0); + } + +} + cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); } stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } @@ -127,18 +159,16 @@ speculative-read|speculative-load | refs|Reference|ops|access | misses|miss { return str(yyscanner, PE_NAME_CACHE_OP_RESULT); } - /* - * These are event config hardcoded term names to be specified - * within xxx/.../ syntax. So far we dont clash with other names, - * so we can put them here directly. In case the we have a conflict - * in future, this needs to go into '//' condition block. - */ +<config>{ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } +, { return ','; } +"/" { BEGIN(INITIAL); return '/'; } +} mem: { BEGIN(mem); return PE_PREFIX_MEM; } r{num_raw_hex} { return raw(yyscanner); } @@ -147,10 +177,12 @@ r{num_raw_hex} { return raw(yyscanner); } {modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); } {name} { return str(yyscanner, PE_NAME); } -"/" { return '/'; } +"/" { BEGIN(config); return '/'; } - { return '-'; } -, { return ','; } +, { BEGIN(event); return ','; } : { return ':'; } +"{" { BEGIN(event); return '{'; } +"}" { return '}'; } = { return '='; } \n { } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 2bc5fbff2b5d..42d9a17b83b1 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -27,10 +27,11 @@ do { \ %token PE_START_EVENTS PE_START_TERMS %token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM +%token PE_EVENT_NAME %token PE_NAME %token PE_MODIFIER_EVENT PE_MODIFIER_BP %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT -%token PE_PREFIX_MEM PE_PREFIX_RAW +%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR %type <num> PE_VALUE %type <num> PE_VALUE_SYM_HW @@ -42,6 +43,7 @@ do { \ %type <str> PE_NAME_CACHE_OP_RESULT %type <str> PE_MODIFIER_EVENT %type <str> PE_MODIFIER_BP +%type <str> PE_EVENT_NAME %type <num> value_sym %type <head> event_config %type <term> event_term @@ -53,44 +55,125 @@ do { \ %type <head> event_legacy_numeric %type <head> event_legacy_raw %type <head> event_def +%type <head> event_mod +%type <head> event_name +%type <head> event +%type <head> events +%type <head> group_def +%type <head> group +%type <head> groups %union { char *str; - unsigned long num; + u64 num; struct list_head *head; struct parse_events__term *term; } %% start: -PE_START_EVENTS events +PE_START_EVENTS start_events | -PE_START_TERMS terms +PE_START_TERMS start_terms + +start_events: groups +{ + struct parse_events_data__events *data = _data; + + parse_events_update_lists($1, &data->list); +} + +groups: +groups ',' group +{ + struct list_head *list = $1; + struct list_head *group = $3; + + parse_events_update_lists(group, list); + $$ = list; +} +| +groups ',' event +{ + struct list_head *list = $1; + struct list_head *event = $3; + + parse_events_update_lists(event, list); + $$ = list; +} +| +group +| +event + +group: +group_def ':' PE_MODIFIER_EVENT +{ + struct list_head *list = $1; + + ABORT_ON(parse_events__modifier_group(list, $3)); + $$ = list; +} +| +group_def + +group_def: +PE_NAME '{' events '}' +{ + struct list_head *list = $3; + + parse_events__set_leader($1, list); + $$ = list; +} +| +'{' events '}' +{ + struct list_head *list = $2; + + parse_events__set_leader(NULL, list); + $$ = list; +} events: -events ',' event | event +events ',' event +{ + struct list_head *event = $3; + struct list_head *list = $1; -event: -event_def PE_MODIFIER_EVENT + parse_events_update_lists(event, list); + $$ = list; +} +| +event + +event: event_mod + +event_mod: +event_name PE_MODIFIER_EVENT { - struct parse_events_data__events *data = _data; + struct list_head *list = $1; /* * Apply modifier on all events added by single event definition * (there could be more events added for multiple tracepoint * definitions via '*?'. */ - ABORT_ON(parse_events_modifier($1, $2)); - parse_events_update_lists($1, &data->list); + ABORT_ON(parse_events__modifier_event(list, $2, false)); + $$ = list; } | -event_def -{ - struct parse_events_data__events *data = _data; +event_name - parse_events_update_lists($1, &data->list); +event_name: +PE_EVENT_NAME event_def +{ + ABORT_ON(parse_events_name($2, $1)); + free($1); + $$ = $2; } +| +event_def event_def: event_pmu | event_legacy_symbol | @@ -207,7 +290,7 @@ PE_VALUE ':' PE_VALUE struct parse_events_data__events *data = _data; struct list_head *list = NULL; - ABORT_ON(parse_events_add_numeric(&list, &data->idx, $1, $3, NULL)); + ABORT_ON(parse_events_add_numeric(&list, &data->idx, (u32)$1, $3, NULL)); $$ = list; } @@ -222,7 +305,7 @@ PE_RAW $$ = list; } -terms: event_config +start_terms: event_config { struct parse_events_data__terms *data = _data; data->terms = $1; @@ -282,7 +365,7 @@ PE_TERM '=' PE_NAME { struct parse_events__term *term; - ABORT_ON(parse_events__term_str(&term, $1, NULL, $3)); + ABORT_ON(parse_events__term_str(&term, (int)$1, NULL, $3)); $$ = term; } | @@ -290,7 +373,7 @@ PE_TERM '=' PE_VALUE { struct parse_events__term *term; - ABORT_ON(parse_events__term_num(&term, $1, NULL, $3)); + ABORT_ON(parse_events__term_num(&term, (int)$1, NULL, $3)); $$ = term; } | @@ -298,7 +381,7 @@ PE_TERM { struct parse_events__term *term; - ABORT_ON(parse_events__term_num(&term, $1, NULL, 1)); + ABORT_ON(parse_events__term_num(&term, (int)$1, NULL, 1)); $$ = term; } diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h new file mode 100644 index 000000000000..9bd6c4e069c8 --- /dev/null +++ b/tools/perf/util/perf_regs.h @@ -0,0 +1,14 @@ +#ifndef __PERF_REGS_H +#define __PERF_REGS_H + +#ifndef NO_PERF_REGS +#include <perf_regs.h> +#else +#define PERF_REGS_MASK 0 + +static inline const char *perf_reg_name(int id __used) +{ + return NULL; +} +#endif /* NO_PERF_REGS */ +#endif /* __PERF_REGS_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 67715a42cd6d..6631d828db3d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -10,6 +10,8 @@ #include "pmu.h" #include "parse-events.h" +#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" + int perf_pmu_parse(struct list_head *list, char *name); extern FILE *perf_pmu_in; @@ -69,7 +71,7 @@ static int pmu_format(char *name, struct list_head *format) return -1; snprintf(path, PATH_MAX, - "%s/bus/event_source/devices/%s/format", sysfs, name); + "%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name); if (stat(path, &st) < 0) return 0; /* no error if format does not exist */ @@ -206,7 +208,7 @@ static int pmu_type(char *name, __u32 *type) return -1; snprintf(path, PATH_MAX, - "%s/bus/event_source/devices/%s/type", sysfs, name); + "%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name); if (stat(path, &st) < 0) return -1; @@ -222,6 +224,35 @@ static int pmu_type(char *name, __u32 *type) return ret; } +/* Add all pmus in sysfs to pmu list: */ +static void pmu_read_sysfs(void) +{ + char path[PATH_MAX]; + const char *sysfs; + DIR *dir; + struct dirent *dent; + + sysfs = sysfs_find_mountpoint(); + if (!sysfs) + return; + + snprintf(path, PATH_MAX, + "%s" EVENT_SOURCE_DEVICE_PATH, sysfs); + + dir = opendir(path); + if (!dir) + return; + + while ((dent = readdir(dir))) { + if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) + continue; + /* add to static LIST_HEAD(pmus): */ + perf_pmu__find(dent->d_name); + } + + closedir(dir); +} + static struct perf_pmu *pmu_lookup(char *name) { struct perf_pmu *pmu; @@ -267,6 +298,21 @@ static struct perf_pmu *pmu_find(char *name) return NULL; } +struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu) +{ + /* + * pmu iterator: If pmu is NULL, we start at the begin, + * otherwise return the next pmu. Returns NULL on end. + */ + if (!pmu) { + pmu_read_sysfs(); + pmu = list_prepare_entry(pmu, &pmus, list); + } + list_for_each_entry_continue(pmu, &pmus, list) + return pmu; + return NULL; +} + struct perf_pmu *perf_pmu__find(char *name) { struct perf_pmu *pmu; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 535f2c5258ab..47f68d3cc5d1 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -46,5 +46,7 @@ int perf_pmu__new_format(struct list_head *list, char *name, int config, unsigned long *bits); void perf_pmu__set_format(unsigned long *bits, long from, long to); +struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); + int perf_pmu__test(void); #endif /* __PMU_H */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 2884e67ee625..213362850abd 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,10 +10,12 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c +util/hweight.c util/thread_map.c util/util.c util/xyarray.c util/cgroup.c util/debugfs.c +util/rblist.c util/strlist.c ../../lib/rbtree.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 0688bfb6d280..27187f0b71f0 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -627,7 +627,7 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel, * This will group just the fds for this single evsel, to group * multiple events, use evlist.open(). */ - if (perf_evsel__open(evsel, cpus, threads, group, NULL) < 0) { + if (perf_evsel__open(evsel, cpus, threads) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } @@ -824,7 +824,10 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist, if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist, &group)) return NULL; - if (perf_evlist__open(evlist, group) < 0) { + if (group) + perf_evlist__set_leader(evlist); + + if (perf_evlist__open(evlist) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 02dfa19a467f..d28001016fb5 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -237,16 +237,16 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } -static inline -struct event_format *find_cache_event(struct pevent *pevent, int type) +static inline struct event_format *find_cache_event(struct perf_evsel *evsel) { static char ev_name[256]; struct event_format *event; + int type = evsel->attr.config; if (events[type]) return events[type]; - events[type] = event = pevent_find_event(pevent, type); + events[type] = event = evsel->tp_format; if (!event) return NULL; @@ -258,22 +258,21 @@ struct event_format *find_cache_event(struct pevent *pevent, int type) } static void perl_process_tracepoint(union perf_event *perf_event __unused, - struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine __unused, - struct thread *thread) + struct addr_location *al) { struct format_field *field; static char handler[256]; unsigned long long val; unsigned long s, ns; struct event_format *event; - int type; int pid; int cpu = sample->cpu; void *data = sample->raw_data; unsigned long long nsecs = sample->time; + struct thread *thread = al->thread; char *comm = thread->comm; dSP; @@ -281,13 +280,11 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, if (evsel->attr.type != PERF_TYPE_TRACEPOINT) return; - type = trace_parse_common_type(pevent, data); - - event = find_cache_event(pevent, type); + event = find_cache_event(evsel); if (!event) - die("ug! no event found for type %d", type); + die("ug! no event found for type %d", evsel->attr.config); - pid = trace_parse_common_pid(pevent, data); + pid = raw_field_value(event, "common_pid", data); sprintf(handler, "%s::%s", event->system, event->name); @@ -320,7 +317,7 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, offset = field->offset; XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0))); } else { /* FIELD_IS_NUMERIC */ - val = read_size(pevent, data + field->offset, + val = read_size(event, data + field->offset, field->size); if (field->flags & FIELD_IS_SIGNED) { XPUSHs(sv_2mortal(newSViv(val))); @@ -349,11 +346,11 @@ static void perl_process_tracepoint(union perf_event *perf_event __unused, LEAVE; } -static void perl_process_event_generic(union perf_event *pevent __unused, +static void perl_process_event_generic(union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel __unused, + struct perf_evsel *evsel, struct machine *machine __unused, - struct thread *thread __unused) + struct addr_location *al __unused) { dSP; @@ -363,7 +360,7 @@ static void perl_process_event_generic(union perf_event *pevent __unused, ENTER; SAVETMPS; PUSHMARK(SP); - XPUSHs(sv_2mortal(newSVpvn((const char *)pevent, pevent->header.size))); + XPUSHs(sv_2mortal(newSVpvn((const char *)event, event->header.size))); XPUSHs(sv_2mortal(newSVpvn((const char *)&evsel->attr, sizeof(evsel->attr)))); XPUSHs(sv_2mortal(newSVpvn((const char *)sample, sizeof(*sample)))); XPUSHs(sv_2mortal(newSVpvn((const char *)sample->raw_data, sample->raw_size))); @@ -376,14 +373,13 @@ static void perl_process_event_generic(union perf_event *pevent __unused, } static void perl_process_event(union perf_event *event, - struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, - struct thread *thread) + struct addr_location *al) { - perl_process_tracepoint(event, pevent, sample, evsel, machine, thread); - perl_process_event_generic(event, sample, evsel, machine, thread); + perl_process_tracepoint(event, sample, evsel, machine, al); + perl_process_event_generic(event, sample, evsel, machine, al); } static void run_start_sub(void) diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index ce4d1b0c3862..afba09729183 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -27,10 +27,12 @@ #include <errno.h> #include "../../perf.h" +#include "../evsel.h" #include "../util.h" #include "../event.h" #include "../thread.h" #include "../trace-event.h" +#include "../evsel.h" PyMODINIT_FUNC initperf_trace_context(void); @@ -194,16 +196,21 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } -static inline -struct event_format *find_cache_event(struct pevent *pevent, int type) +static inline struct event_format *find_cache_event(struct perf_evsel *evsel) { static char ev_name[256]; struct event_format *event; + int type = evsel->attr.config; + /* + * XXX: Do we really need to cache this since now we have evsel->tp_format + * cached already? Need to re-read this "cache" routine that as well calls + * define_event_symbols() :-\ + */ if (events[type]) return events[type]; - events[type] = event = pevent_find_event(pevent, type); + events[type] = event = evsel->tp_format; if (!event) return NULL; @@ -214,12 +221,11 @@ struct event_format *find_cache_event(struct pevent *pevent, int type) return event; } -static void python_process_event(union perf_event *perf_event __unused, - struct pevent *pevent, +static void python_process_tracepoint(union perf_event *perf_event __unused, struct perf_sample *sample, - struct perf_evsel *evsel __unused, + struct perf_evsel *evsel, struct machine *machine __unused, - struct thread *thread) + struct addr_location *al) { PyObject *handler, *retval, *context, *t, *obj, *dict = NULL; static char handler_name[256]; @@ -228,24 +234,22 @@ static void python_process_event(union perf_event *perf_event __unused, unsigned long s, ns; struct event_format *event; unsigned n = 0; - int type; int pid; int cpu = sample->cpu; void *data = sample->raw_data; unsigned long long nsecs = sample->time; + struct thread *thread = al->thread; char *comm = thread->comm; t = PyTuple_New(MAX_FIELDS); if (!t) Py_FatalError("couldn't create Python tuple"); - type = trace_parse_common_type(pevent, data); - - event = find_cache_event(pevent, type); + event = find_cache_event(evsel); if (!event) - die("ug! no event found for type %d", type); + die("ug! no event found for type %d", (int)evsel->attr.config); - pid = trace_parse_common_pid(pevent, data); + pid = raw_field_value(event, "common_pid", data); sprintf(handler_name, "%s__%s", event->system, event->name); @@ -290,7 +294,7 @@ static void python_process_event(union perf_event *perf_event __unused, offset = field->offset; obj = PyString_FromString((char *)data + offset); } else { /* FIELD_IS_NUMERIC */ - val = read_size(pevent, data + field->offset, + val = read_size(event, data + field->offset, field->size); if (field->flags & FIELD_IS_SIGNED) { if ((long long)val >= LONG_MIN && @@ -335,6 +339,83 @@ static void python_process_event(union perf_event *perf_event __unused, Py_DECREF(t); } +static void python_process_general_event(union perf_event *perf_event __unused, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine __unused, + struct addr_location *al) +{ + PyObject *handler, *retval, *t, *dict; + static char handler_name[64]; + unsigned n = 0; + struct thread *thread = al->thread; + + /* + * Use the MAX_FIELDS to make the function expandable, though + * currently there is only one item for the tuple. + */ + t = PyTuple_New(MAX_FIELDS); + if (!t) + Py_FatalError("couldn't create Python tuple"); + + dict = PyDict_New(); + if (!dict) + Py_FatalError("couldn't create Python dictionary"); + + snprintf(handler_name, sizeof(handler_name), "%s", "process_event"); + + handler = PyDict_GetItemString(main_dict, handler_name); + if (!handler || !PyCallable_Check(handler)) + goto exit; + + PyDict_SetItemString(dict, "ev_name", PyString_FromString(perf_evsel__name(evsel))); + PyDict_SetItemString(dict, "attr", PyString_FromStringAndSize( + (const char *)&evsel->attr, sizeof(evsel->attr))); + PyDict_SetItemString(dict, "sample", PyString_FromStringAndSize( + (const char *)sample, sizeof(*sample))); + PyDict_SetItemString(dict, "raw_buf", PyString_FromStringAndSize( + (const char *)sample->raw_data, sample->raw_size)); + PyDict_SetItemString(dict, "comm", + PyString_FromString(thread->comm)); + if (al->map) { + PyDict_SetItemString(dict, "dso", + PyString_FromString(al->map->dso->name)); + } + if (al->sym) { + PyDict_SetItemString(dict, "symbol", + PyString_FromString(al->sym->name)); + } + + PyTuple_SetItem(t, n++, dict); + if (_PyTuple_Resize(&t, n) == -1) + Py_FatalError("error resizing Python tuple"); + + retval = PyObject_CallObject(handler, t); + if (retval == NULL) + handler_call_die(handler_name); +exit: + Py_DECREF(dict); + Py_DECREF(t); +} + +static void python_process_event(union perf_event *perf_event, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine, + struct addr_location *al) +{ + switch (evsel->attr.type) { + case PERF_TYPE_TRACEPOINT: + python_process_tracepoint(perf_event, sample, evsel, + machine, al); + break; + /* Reserve for future process_hw/sw/raw APIs */ + default: + python_process_general_event(perf_event, sample, evsel, + machine, al); + } +} + static int run_start_sub(void) { PyObject *handler, *retval; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2437fb0b463a..f7bb7ae328da 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -15,6 +15,8 @@ #include "util.h" #include "cpumap.h" #include "event-parse.h" +#include "perf_regs.h" +#include "unwind.h" static int perf_session__open(struct perf_session *self, bool force) { @@ -288,10 +290,11 @@ struct branch_info *machine__resolve_bstack(struct machine *self, return bi; } -int machine__resolve_callchain(struct machine *self, - struct thread *thread, - struct ip_callchain *chain, - struct symbol **parent) +static int machine__resolve_callchain_sample(struct machine *machine, + struct thread *thread, + struct ip_callchain *chain, + struct symbol **parent) + { u8 cpumode = PERF_RECORD_MISC_USER; unsigned int i; @@ -316,11 +319,14 @@ int machine__resolve_callchain(struct machine *self, if (ip >= PERF_CONTEXT_MAX) { switch (ip) { case PERF_CONTEXT_HV: - cpumode = PERF_RECORD_MISC_HYPERVISOR; break; + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; case PERF_CONTEXT_KERNEL: - cpumode = PERF_RECORD_MISC_KERNEL; break; + cpumode = PERF_RECORD_MISC_KERNEL; + break; case PERF_CONTEXT_USER: - cpumode = PERF_RECORD_MISC_USER; break; + cpumode = PERF_RECORD_MISC_USER; + break; default: pr_debug("invalid callchain context: " "%"PRId64"\n", (s64) ip); @@ -335,7 +341,7 @@ int machine__resolve_callchain(struct machine *self, } al.filtered = false; - thread__find_addr_location(thread, self, cpumode, + thread__find_addr_location(thread, machine, cpumode, MAP__FUNCTION, ip, &al, NULL); if (al.sym != NULL) { if (sort__has_parent && !*parent && @@ -354,6 +360,40 @@ int machine__resolve_callchain(struct machine *self, return 0; } +static int unwind_entry(struct unwind_entry *entry, void *arg) +{ + struct callchain_cursor *cursor = arg; + return callchain_cursor_append(cursor, entry->ip, + entry->map, entry->sym); +} + +int machine__resolve_callchain(struct machine *machine, + struct perf_evsel *evsel, + struct thread *thread, + struct perf_sample *sample, + struct symbol **parent) + +{ + int ret; + + callchain_cursor_reset(&callchain_cursor); + + ret = machine__resolve_callchain_sample(machine, thread, + sample->callchain, parent); + if (ret) + return ret; + + /* Can we do dwarf post unwind? */ + if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) && + (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER))) + return 0; + + return unwind__get_entries(unwind_entry, &callchain_cursor, machine, + thread, evsel->attr.sample_regs_user, + sample); + +} + static int process_event_synth_tracing_data_stub(union perf_event *event __used, struct perf_session *session __used) { @@ -860,6 +900,34 @@ static void branch_stack__printf(struct perf_sample *sample) sample->branch_stack->entries[i].to); } +static void regs_dump__printf(u64 mask, u64 *regs) +{ + unsigned rid, i = 0; + + for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) { + u64 val = regs[i++]; + + printf(".... %-5s 0x%" PRIx64 "\n", + perf_reg_name(rid), val); + } +} + +static void regs_user__printf(struct perf_sample *sample, u64 mask) +{ + struct regs_dump *user_regs = &sample->user_regs; + + if (user_regs->regs) { + printf("... user regs: mask 0x%" PRIx64 "\n", mask); + regs_dump__printf(mask, user_regs->regs); + } +} + +static void stack_user__printf(struct stack_dump *dump) +{ + printf("... ustack: size %" PRIu64 ", offset 0x%x\n", + dump->size, dump->offset); +} + static void perf_session__print_tstamp(struct perf_session *session, union perf_event *event, struct perf_sample *sample) @@ -897,7 +965,7 @@ static void dump_event(struct perf_session *session, union perf_event *event, event->header.size, perf_event__name(event->header.type)); } -static void dump_sample(struct perf_session *session, union perf_event *event, +static void dump_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *sample) { u64 sample_type; @@ -909,13 +977,19 @@ static void dump_sample(struct perf_session *session, union perf_event *event, event->header.misc, sample->pid, sample->tid, sample->ip, sample->period, sample->addr); - sample_type = perf_evlist__sample_type(session->evlist); + sample_type = evsel->attr.sample_type; if (sample_type & PERF_SAMPLE_CALLCHAIN) callchain__printf(sample); if (sample_type & PERF_SAMPLE_BRANCH_STACK) branch_stack__printf(sample); + + if (sample_type & PERF_SAMPLE_REGS_USER) + regs_user__printf(sample, evsel->attr.sample_regs_user); + + if (sample_type & PERF_SAMPLE_STACK_USER) + stack_user__printf(&sample->user_stack); } static struct machine * @@ -973,7 +1047,7 @@ static int perf_session_deliver_event(struct perf_session *session, switch (event->header.type) { case PERF_RECORD_SAMPLE: - dump_sample(session, event, sample); + dump_sample(evsel, event, sample); if (evsel == NULL) { ++session->hists.stats.nr_unknown_id; return 0; @@ -1498,9 +1572,9 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, return NULL; } -void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, - struct machine *machine, int print_sym, - int print_dso, int print_symoffset) +void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, + struct perf_sample *sample, struct machine *machine, + int print_sym, int print_dso, int print_symoffset) { struct addr_location al; struct callchain_cursor_node *node; @@ -1514,8 +1588,9 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, if (symbol_conf.use_callchain && sample->callchain) { - if (machine__resolve_callchain(machine, al.thread, - sample->callchain, NULL) != 0) { + + if (machine__resolve_callchain(machine, evsel, al.thread, + sample, NULL) != 0) { if (verbose) error("Failed to resolve callchain. Skipping\n"); return; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 1f7ec87db7d7..176a60902f56 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -129,9 +129,9 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type); -void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, - struct machine *machine, int print_sym, - int print_dso, int print_symoffset); +void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event, + struct perf_sample *sample, struct machine *machine, + int print_sym, int print_dso, int print_symoffset); int perf_session__cpu_bitmap(struct perf_session *session, const char *cpu_list, unsigned long *cpu_bitmap); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c new file mode 100644 index 000000000000..db0cc92cf2ea --- /dev/null +++ b/tools/perf/util/symbol-elf.c @@ -0,0 +1,841 @@ +#include <libelf.h> +#include <gelf.h> +#include <elf.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <unistd.h> +#include <inttypes.h> + +#include "symbol.h" +#include "debug.h" + +#ifndef NT_GNU_BUILD_ID +#define NT_GNU_BUILD_ID 3 +#endif + +/** + * elf_symtab__for_each_symbol - iterate thru all the symbols + * + * @syms: struct elf_symtab instance to iterate + * @idx: uint32_t idx + * @sym: GElf_Sym iterator + */ +#define elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) \ + for (idx = 0, gelf_getsym(syms, idx, &sym);\ + idx < nr_syms; \ + idx++, gelf_getsym(syms, idx, &sym)) + +static inline uint8_t elf_sym__type(const GElf_Sym *sym) +{ + return GELF_ST_TYPE(sym->st_info); +} + +static inline int elf_sym__is_function(const GElf_Sym *sym) +{ + return elf_sym__type(sym) == STT_FUNC && + sym->st_name != 0 && + sym->st_shndx != SHN_UNDEF; +} + +static inline bool elf_sym__is_object(const GElf_Sym *sym) +{ + return elf_sym__type(sym) == STT_OBJECT && + sym->st_name != 0 && + sym->st_shndx != SHN_UNDEF; +} + +static inline int elf_sym__is_label(const GElf_Sym *sym) +{ + return elf_sym__type(sym) == STT_NOTYPE && + sym->st_name != 0 && + sym->st_shndx != SHN_UNDEF && + sym->st_shndx != SHN_ABS; +} + +static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type) +{ + switch (type) { + case MAP__FUNCTION: + return elf_sym__is_function(sym); + case MAP__VARIABLE: + return elf_sym__is_object(sym); + default: + return false; + } +} + +static inline const char *elf_sym__name(const GElf_Sym *sym, + const Elf_Data *symstrs) +{ + return symstrs->d_buf + sym->st_name; +} + +static inline const char *elf_sec__name(const GElf_Shdr *shdr, + const Elf_Data *secstrs) +{ + return secstrs->d_buf + shdr->sh_name; +} + +static inline int elf_sec__is_text(const GElf_Shdr *shdr, + const Elf_Data *secstrs) +{ + return strstr(elf_sec__name(shdr, secstrs), "text") != NULL; +} + +static inline bool elf_sec__is_data(const GElf_Shdr *shdr, + const Elf_Data *secstrs) +{ + return strstr(elf_sec__name(shdr, secstrs), "data") != NULL; +} + +static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs, + enum map_type type) +{ + switch (type) { + case MAP__FUNCTION: + return elf_sec__is_text(shdr, secstrs); + case MAP__VARIABLE: + return elf_sec__is_data(shdr, secstrs); + default: + return false; + } +} + +static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) +{ + Elf_Scn *sec = NULL; + GElf_Shdr shdr; + size_t cnt = 1; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + gelf_getshdr(sec, &shdr); + + if ((addr >= shdr.sh_addr) && + (addr < (shdr.sh_addr + shdr.sh_size))) + return cnt; + + ++cnt; + } + + return -1; +} + +static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, + GElf_Shdr *shp, const char *name, + size_t *idx) +{ + Elf_Scn *sec = NULL; + size_t cnt = 1; + + /* Elf is corrupted/truncated, avoid calling elf_strptr. */ + if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) + return NULL; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + char *str; + + gelf_getshdr(sec, shp); + str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name); + if (!strcmp(name, str)) { + if (idx) + *idx = cnt; + break; + } + ++cnt; + } + + return sec; +} + +#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \ + for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \ + idx < nr_entries; \ + ++idx, pos = gelf_getrel(reldata, idx, &pos_mem)) + +#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \ + for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \ + idx < nr_entries; \ + ++idx, pos = gelf_getrela(reldata, idx, &pos_mem)) + +/* + * We need to check if we have a .dynsym, so that we can handle the + * .plt, synthesizing its symbols, that aren't on the symtabs (be it + * .dynsym or .symtab). + * And always look at the original dso, not at debuginfo packages, that + * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). + */ +int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map, + symbol_filter_t filter) +{ + uint32_t nr_rel_entries, idx; + GElf_Sym sym; + u64 plt_offset; + GElf_Shdr shdr_plt; + struct symbol *f; + GElf_Shdr shdr_rel_plt, shdr_dynsym; + Elf_Data *reldata, *syms, *symstrs; + Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym; + size_t dynsym_idx; + GElf_Ehdr ehdr; + char sympltname[1024]; + Elf *elf; + int nr = 0, symidx, err = 0; + + if (!ss->dynsym) + return 0; + + elf = ss->elf; + ehdr = ss->ehdr; + + scn_dynsym = ss->dynsym; + shdr_dynsym = ss->dynshdr; + dynsym_idx = ss->dynsym_idx; + + if (scn_dynsym == NULL) + goto out_elf_end; + + scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, + ".rela.plt", NULL); + if (scn_plt_rel == NULL) { + scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, + ".rel.plt", NULL); + if (scn_plt_rel == NULL) + goto out_elf_end; + } + + err = -1; + + if (shdr_rel_plt.sh_link != dynsym_idx) + goto out_elf_end; + + if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL) + goto out_elf_end; + + /* + * Fetch the relocation section to find the idxes to the GOT + * and the symbols in the .dynsym they refer to. + */ + reldata = elf_getdata(scn_plt_rel, NULL); + if (reldata == NULL) + goto out_elf_end; + + syms = elf_getdata(scn_dynsym, NULL); + if (syms == NULL) + goto out_elf_end; + + scn_symstrs = elf_getscn(elf, shdr_dynsym.sh_link); + if (scn_symstrs == NULL) + goto out_elf_end; + + symstrs = elf_getdata(scn_symstrs, NULL); + if (symstrs == NULL) + goto out_elf_end; + + if (symstrs->d_size == 0) + goto out_elf_end; + + nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; + plt_offset = shdr_plt.sh_offset; + + if (shdr_rel_plt.sh_type == SHT_RELA) { + GElf_Rela pos_mem, *pos; + + elf_section__for_each_rela(reldata, pos, pos_mem, idx, + nr_rel_entries) { + symidx = GELF_R_SYM(pos->r_info); + plt_offset += shdr_plt.sh_entsize; + gelf_getsym(syms, symidx, &sym); + snprintf(sympltname, sizeof(sympltname), + "%s@plt", elf_sym__name(&sym, symstrs)); + + f = symbol__new(plt_offset, shdr_plt.sh_entsize, + STB_GLOBAL, sympltname); + if (!f) + goto out_elf_end; + + if (filter && filter(map, f)) + symbol__delete(f); + else { + symbols__insert(&dso->symbols[map->type], f); + ++nr; + } + } + } else if (shdr_rel_plt.sh_type == SHT_REL) { + GElf_Rel pos_mem, *pos; + elf_section__for_each_rel(reldata, pos, pos_mem, idx, + nr_rel_entries) { + symidx = GELF_R_SYM(pos->r_info); + plt_offset += shdr_plt.sh_entsize; + gelf_getsym(syms, symidx, &sym); + snprintf(sympltname, sizeof(sympltname), + "%s@plt", elf_sym__name(&sym, symstrs)); + + f = symbol__new(plt_offset, shdr_plt.sh_entsize, + STB_GLOBAL, sympltname); + if (!f) + goto out_elf_end; + + if (filter && filter(map, f)) + symbol__delete(f); + else { + symbols__insert(&dso->symbols[map->type], f); + ++nr; + } + } + } + + err = 0; +out_elf_end: + if (err == 0) + return nr; + pr_debug("%s: problems reading %s PLT info.\n", + __func__, dso->long_name); + return 0; +} + +/* + * Align offset to 4 bytes as needed for note name and descriptor data. + */ +#define NOTE_ALIGN(n) (((n) + 3) & -4U) + +static int elf_read_build_id(Elf *elf, void *bf, size_t size) +{ + int err = -1; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Data *data; + Elf_Scn *sec; + Elf_Kind ek; + void *ptr; + + if (size < BUILD_ID_SIZE) + goto out; + + ek = elf_kind(elf); + if (ek != ELF_K_ELF) + goto out; + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_err("%s: cannot get elf header.\n", __func__); + goto out; + } + + /* + * Check following sections for notes: + * '.note.gnu.build-id' + * '.notes' + * '.note' (VDSO specific) + */ + do { + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".note.gnu.build-id", NULL); + if (sec) + break; + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".notes", NULL); + if (sec) + break; + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".note", NULL); + if (sec) + break; + + return err; + + } while (0); + + data = elf_getdata(sec, NULL); + if (data == NULL) + goto out; + + ptr = data->d_buf; + while (ptr < (data->d_buf + data->d_size)) { + GElf_Nhdr *nhdr = ptr; + size_t namesz = NOTE_ALIGN(nhdr->n_namesz), + descsz = NOTE_ALIGN(nhdr->n_descsz); + const char *name; + + ptr += sizeof(*nhdr); + name = ptr; + ptr += namesz; + if (nhdr->n_type == NT_GNU_BUILD_ID && + nhdr->n_namesz == sizeof("GNU")) { + if (memcmp(name, "GNU", sizeof("GNU")) == 0) { + size_t sz = min(size, descsz); + memcpy(bf, ptr, sz); + memset(bf + sz, 0, size - sz); + err = descsz; + break; + } + } + ptr += descsz; + } + +out: + return err; +} + +int filename__read_build_id(const char *filename, void *bf, size_t size) +{ + int fd, err = -1; + Elf *elf; + + if (size < BUILD_ID_SIZE) + goto out; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); + goto out_close; + } + + err = elf_read_build_id(elf, bf, size); + + elf_end(elf); +out_close: + close(fd); +out: + return err; +} + +int sysfs__read_build_id(const char *filename, void *build_id, size_t size) +{ + int fd, err = -1; + + if (size < BUILD_ID_SIZE) + goto out; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out; + + while (1) { + char bf[BUFSIZ]; + GElf_Nhdr nhdr; + size_t namesz, descsz; + + if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr)) + break; + + namesz = NOTE_ALIGN(nhdr.n_namesz); + descsz = NOTE_ALIGN(nhdr.n_descsz); + if (nhdr.n_type == NT_GNU_BUILD_ID && + nhdr.n_namesz == sizeof("GNU")) { + if (read(fd, bf, namesz) != (ssize_t)namesz) + break; + if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { + size_t sz = min(descsz, size); + if (read(fd, build_id, sz) == (ssize_t)sz) { + memset(build_id + sz, 0, size - sz); + err = 0; + break; + } + } else if (read(fd, bf, descsz) != (ssize_t)descsz) + break; + } else { + int n = namesz + descsz; + if (read(fd, bf, n) != n) + break; + } + } + close(fd); +out: + return err; +} + +int filename__read_debuglink(const char *filename, char *debuglink, + size_t size) +{ + int fd, err = -1; + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Data *data; + Elf_Scn *sec; + Elf_Kind ek; + + fd = open(filename, O_RDONLY); + if (fd < 0) + goto out; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); + goto out_close; + } + + ek = elf_kind(elf); + if (ek != ELF_K_ELF) + goto out_close; + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_err("%s: cannot get elf header.\n", __func__); + goto out_close; + } + + sec = elf_section_by_name(elf, &ehdr, &shdr, + ".gnu_debuglink", NULL); + if (sec == NULL) + goto out_close; + + data = elf_getdata(sec, NULL); + if (data == NULL) + goto out_close; + + /* the start of this section is a zero-terminated string */ + strncpy(debuglink, data->d_buf, size); + + elf_end(elf); + +out_close: + close(fd); +out: + return err; +} + +static int dso__swap_init(struct dso *dso, unsigned char eidata) +{ + static unsigned int const endian = 1; + + dso->needs_swap = DSO_SWAP__NO; + + switch (eidata) { + case ELFDATA2LSB: + /* We are big endian, DSO is little endian. */ + if (*(unsigned char const *)&endian != 1) + dso->needs_swap = DSO_SWAP__YES; + break; + + case ELFDATA2MSB: + /* We are little endian, DSO is big endian. */ + if (*(unsigned char const *)&endian != 0) + dso->needs_swap = DSO_SWAP__YES; + break; + + default: + pr_err("unrecognized DSO data encoding %d\n", eidata); + return -EINVAL; + } + + return 0; +} + +bool symsrc__possibly_runtime(struct symsrc *ss) +{ + return ss->dynsym || ss->opdsec; +} + +bool symsrc__has_symtab(struct symsrc *ss) +{ + return ss->symtab != NULL; +} + +void symsrc__destroy(struct symsrc *ss) +{ + free(ss->name); + elf_end(ss->elf); + close(ss->fd); +} + +int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, + enum dso_binary_type type) +{ + int err = -1; + GElf_Ehdr ehdr; + Elf *elf; + int fd; + + fd = open(name, O_RDONLY); + if (fd < 0) + return -1; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) { + pr_debug("%s: cannot read %s ELF file.\n", __func__, name); + goto out_close; + } + + if (gelf_getehdr(elf, &ehdr) == NULL) { + pr_debug("%s: cannot get elf header.\n", __func__); + goto out_elf_end; + } + + if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) + goto out_elf_end; + + /* Always reject images with a mismatched build-id: */ + if (dso->has_build_id) { + u8 build_id[BUILD_ID_SIZE]; + + if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) + goto out_elf_end; + + if (!dso__build_id_equal(dso, build_id)) + goto out_elf_end; + } + + ss->symtab = elf_section_by_name(elf, &ehdr, &ss->symshdr, ".symtab", + NULL); + if (ss->symshdr.sh_type != SHT_SYMTAB) + ss->symtab = NULL; + + ss->dynsym_idx = 0; + ss->dynsym = elf_section_by_name(elf, &ehdr, &ss->dynshdr, ".dynsym", + &ss->dynsym_idx); + if (ss->dynshdr.sh_type != SHT_DYNSYM) + ss->dynsym = NULL; + + ss->opdidx = 0; + ss->opdsec = elf_section_by_name(elf, &ehdr, &ss->opdshdr, ".opd", + &ss->opdidx); + if (ss->opdshdr.sh_type != SHT_PROGBITS) + ss->opdsec = NULL; + + if (dso->kernel == DSO_TYPE_USER) { + GElf_Shdr shdr; + ss->adjust_symbols = (ehdr.e_type == ET_EXEC || + elf_section_by_name(elf, &ehdr, &shdr, + ".gnu.prelink_undo", + NULL) != NULL); + } else { + ss->adjust_symbols = 0; + } + + ss->name = strdup(name); + if (!ss->name) + goto out_elf_end; + + ss->elf = elf; + ss->fd = fd; + ss->ehdr = ehdr; + ss->type = type; + + return 0; + +out_elf_end: + elf_end(elf); +out_close: + close(fd); + return err; +} + +int dso__load_sym(struct dso *dso, struct map *map, + struct symsrc *syms_ss, struct symsrc *runtime_ss, + symbol_filter_t filter, int kmodule) +{ + struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; + struct map *curr_map = map; + struct dso *curr_dso = dso; + Elf_Data *symstrs, *secstrs; + uint32_t nr_syms; + int err = -1; + uint32_t idx; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + Elf_Data *syms, *opddata = NULL; + GElf_Sym sym; + Elf_Scn *sec, *sec_strndx; + Elf *elf; + int nr = 0; + + dso->symtab_type = syms_ss->type; + + if (!syms_ss->symtab) { + syms_ss->symtab = syms_ss->dynsym; + syms_ss->symshdr = syms_ss->dynshdr; + } + + elf = syms_ss->elf; + ehdr = syms_ss->ehdr; + sec = syms_ss->symtab; + shdr = syms_ss->symshdr; + + if (runtime_ss->opdsec) + opddata = elf_rawdata(runtime_ss->opdsec, NULL); + + syms = elf_getdata(sec, NULL); + if (syms == NULL) + goto out_elf_end; + + sec = elf_getscn(elf, shdr.sh_link); + if (sec == NULL) + goto out_elf_end; + + symstrs = elf_getdata(sec, NULL); + if (symstrs == NULL) + goto out_elf_end; + + sec_strndx = elf_getscn(elf, ehdr.e_shstrndx); + if (sec_strndx == NULL) + goto out_elf_end; + + secstrs = elf_getdata(sec_strndx, NULL); + if (secstrs == NULL) + goto out_elf_end; + + nr_syms = shdr.sh_size / shdr.sh_entsize; + + memset(&sym, 0, sizeof(sym)); + dso->adjust_symbols = runtime_ss->adjust_symbols; + elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { + struct symbol *f; + const char *elf_name = elf_sym__name(&sym, symstrs); + char *demangled = NULL; + int is_label = elf_sym__is_label(&sym); + const char *section_name; + bool used_opd = false; + + if (kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name && + strcmp(elf_name, kmap->ref_reloc_sym->name) == 0) + kmap->ref_reloc_sym->unrelocated_addr = sym.st_value; + + if (!is_label && !elf_sym__is_a(&sym, map->type)) + continue; + + /* Reject ARM ELF "mapping symbols": these aren't unique and + * don't identify functions, so will confuse the profile + * output: */ + if (ehdr.e_machine == EM_ARM) { + if (!strcmp(elf_name, "$a") || + !strcmp(elf_name, "$d") || + !strcmp(elf_name, "$t")) + continue; + } + + if (runtime_ss->opdsec && sym.st_shndx == runtime_ss->opdidx) { + u32 offset = sym.st_value - syms_ss->opdshdr.sh_addr; + u64 *opd = opddata->d_buf + offset; + sym.st_value = DSO__SWAP(dso, u64, *opd); + sym.st_shndx = elf_addr_to_index(runtime_ss->elf, + sym.st_value); + used_opd = true; + } + + sec = elf_getscn(runtime_ss->elf, sym.st_shndx); + if (!sec) + goto out_elf_end; + + gelf_getshdr(sec, &shdr); + + if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type)) + continue; + + section_name = elf_sec__name(&shdr, secstrs); + + /* On ARM, symbols for thumb functions have 1 added to + * the symbol address as a flag - remove it */ + if ((ehdr.e_machine == EM_ARM) && + (map->type == MAP__FUNCTION) && + (sym.st_value & 1)) + --sym.st_value; + + if (dso->kernel != DSO_TYPE_USER || kmodule) { + char dso_name[PATH_MAX]; + + if (strcmp(section_name, + (curr_dso->short_name + + dso->short_name_len)) == 0) + goto new_symbol; + + if (strcmp(section_name, ".text") == 0) { + curr_map = map; + curr_dso = dso; + goto new_symbol; + } + + snprintf(dso_name, sizeof(dso_name), + "%s%s", dso->short_name, section_name); + + curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name); + if (curr_map == NULL) { + u64 start = sym.st_value; + + if (kmodule) + start += map->start + shdr.sh_offset; + + curr_dso = dso__new(dso_name); + if (curr_dso == NULL) + goto out_elf_end; + curr_dso->kernel = dso->kernel; + curr_dso->long_name = dso->long_name; + curr_dso->long_name_len = dso->long_name_len; + curr_map = map__new2(start, curr_dso, + map->type); + if (curr_map == NULL) { + dso__delete(curr_dso); + goto out_elf_end; + } + curr_map->map_ip = identity__map_ip; + curr_map->unmap_ip = identity__map_ip; + curr_dso->symtab_type = dso->symtab_type; + map_groups__insert(kmap->kmaps, curr_map); + dsos__add(&dso->node, curr_dso); + dso__set_loaded(curr_dso, map->type); + } else + curr_dso = curr_map->dso; + + goto new_symbol; + } + + if ((used_opd && runtime_ss->adjust_symbols) + || (!used_opd && syms_ss->adjust_symbols)) { + pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " + "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, + (u64)sym.st_value, (u64)shdr.sh_addr, + (u64)shdr.sh_offset); + sym.st_value -= shdr.sh_addr - shdr.sh_offset; + } + /* + * We need to figure out if the object was created from C++ sources + * DWARF DW_compile_unit has this, but we don't always have access + * to it... + */ + demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI); + if (demangled != NULL) + elf_name = demangled; +new_symbol: + f = symbol__new(sym.st_value, sym.st_size, + GELF_ST_BIND(sym.st_info), elf_name); + free(demangled); + if (!f) + goto out_elf_end; + + if (filter && filter(curr_map, f)) + symbol__delete(f); + else { + symbols__insert(&curr_dso->symbols[curr_map->type], f); + nr++; + } + } + + /* + * For misannotated, zeroed, ASM function sizes. + */ + if (nr > 0) { + symbols__fixup_duplicate(&dso->symbols[map->type]); + symbols__fixup_end(&dso->symbols[map->type]); + if (kmap) { + /* + * We need to fixup this here too because we create new + * maps here, for things like vsyscall sections. + */ + __map_groups__fixup_end(kmap->kmaps, map->type); + } + } + err = nr; +out_elf_end: + return err; +} + +void symbol__elf_init(void) +{ + elf_version(EV_CURRENT); +} diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c new file mode 100644 index 000000000000..6738ea128c90 --- /dev/null +++ b/tools/perf/util/symbol-minimal.c @@ -0,0 +1,303 @@ +#include "symbol.h" + +#include <elf.h> +#include <stdio.h> +#include <fcntl.h> +#include <string.h> +#include <byteswap.h> +#include <sys/stat.h> + + +static bool check_need_swap(int file_endian) +{ + const int data = 1; + u8 *check = (u8 *)&data; + int host_endian; + + if (check[0] == 1) + host_endian = ELFDATA2LSB; + else + host_endian = ELFDATA2MSB; + + return host_endian != file_endian; +} + +#define NOTE_ALIGN(sz) (((sz) + 3) & ~3) + +#define NT_GNU_BUILD_ID 3 + +static int read_build_id(void *note_data, size_t note_len, void *bf, + size_t size, bool need_swap) +{ + struct { + u32 n_namesz; + u32 n_descsz; + u32 n_type; + } *nhdr; + void *ptr; + + ptr = note_data; + while (ptr < (note_data + note_len)) { + const char *name; + size_t namesz, descsz; + + nhdr = ptr; + if (need_swap) { + nhdr->n_namesz = bswap_32(nhdr->n_namesz); + nhdr->n_descsz = bswap_32(nhdr->n_descsz); + nhdr->n_type = bswap_32(nhdr->n_type); + } + + namesz = NOTE_ALIGN(nhdr->n_namesz); + descsz = NOTE_ALIGN(nhdr->n_descsz); + + ptr += sizeof(*nhdr); + name = ptr; + ptr += namesz; + if (nhdr->n_type == NT_GNU_BUILD_ID && + nhdr->n_namesz == sizeof("GNU")) { + if (memcmp(name, "GNU", sizeof("GNU")) == 0) { + size_t sz = min(size, descsz); + memcpy(bf, ptr, sz); + memset(bf + sz, 0, size - sz); + return 0; + } + } + ptr += descsz; + } + + return -1; +} + +int filename__read_debuglink(const char *filename __used, + char *debuglink __used, size_t size __used) +{ + return -1; +} + +/* + * Just try PT_NOTE header otherwise fails + */ +int filename__read_build_id(const char *filename, void *bf, size_t size) +{ + FILE *fp; + int ret = -1; + bool need_swap = false; + u8 e_ident[EI_NIDENT]; + size_t buf_size; + void *buf; + int i; + + fp = fopen(filename, "r"); + if (fp == NULL) + return -1; + + if (fread(e_ident, sizeof(e_ident), 1, fp) != 1) + goto out; + + if (memcmp(e_ident, ELFMAG, SELFMAG) || + e_ident[EI_VERSION] != EV_CURRENT) + goto out; + + need_swap = check_need_swap(e_ident[EI_DATA]); + + /* for simplicity */ + fseek(fp, 0, SEEK_SET); + + if (e_ident[EI_CLASS] == ELFCLASS32) { + Elf32_Ehdr ehdr; + Elf32_Phdr *phdr; + + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) + goto out; + + if (need_swap) { + ehdr.e_phoff = bswap_32(ehdr.e_phoff); + ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); + ehdr.e_phnum = bswap_16(ehdr.e_phnum); + } + + buf_size = ehdr.e_phentsize * ehdr.e_phnum; + buf = malloc(buf_size); + if (buf == NULL) + goto out; + + fseek(fp, ehdr.e_phoff, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + void *tmp; + + if (need_swap) { + phdr->p_type = bswap_32(phdr->p_type); + phdr->p_offset = bswap_32(phdr->p_offset); + phdr->p_filesz = bswap_32(phdr->p_filesz); + } + + if (phdr->p_type != PT_NOTE) + continue; + + buf_size = phdr->p_filesz; + tmp = realloc(buf, buf_size); + if (tmp == NULL) + goto out_free; + + buf = tmp; + fseek(fp, phdr->p_offset, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + ret = read_build_id(buf, buf_size, bf, size, need_swap); + if (ret == 0) + ret = size; + break; + } + } else { + Elf64_Ehdr ehdr; + Elf64_Phdr *phdr; + + if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) + goto out; + + if (need_swap) { + ehdr.e_phoff = bswap_64(ehdr.e_phoff); + ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); + ehdr.e_phnum = bswap_16(ehdr.e_phnum); + } + + buf_size = ehdr.e_phentsize * ehdr.e_phnum; + buf = malloc(buf_size); + if (buf == NULL) + goto out; + + fseek(fp, ehdr.e_phoff, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + void *tmp; + + if (need_swap) { + phdr->p_type = bswap_32(phdr->p_type); + phdr->p_offset = bswap_64(phdr->p_offset); + phdr->p_filesz = bswap_64(phdr->p_filesz); + } + + if (phdr->p_type != PT_NOTE) + continue; + + buf_size = phdr->p_filesz; + tmp = realloc(buf, buf_size); + if (tmp == NULL) + goto out_free; + + buf = tmp; + fseek(fp, phdr->p_offset, SEEK_SET); + if (fread(buf, buf_size, 1, fp) != 1) + goto out_free; + + ret = read_build_id(buf, buf_size, bf, size, need_swap); + if (ret == 0) + ret = size; + break; + } + } +out_free: + free(buf); +out: + fclose(fp); + return ret; +} + +int sysfs__read_build_id(const char *filename, void *build_id, size_t size) +{ + int fd; + int ret = -1; + struct stat stbuf; + size_t buf_size; + void *buf; + + fd = open(filename, O_RDONLY); + if (fd < 0) + return -1; + + if (fstat(fd, &stbuf) < 0) + goto out; + + buf_size = stbuf.st_size; + buf = malloc(buf_size); + if (buf == NULL) + goto out; + + if (read(fd, buf, buf_size) != (ssize_t) buf_size) + goto out_free; + + ret = read_build_id(buf, buf_size, build_id, size, false); +out_free: + free(buf); +out: + close(fd); + return ret; +} + +int symsrc__init(struct symsrc *ss, struct dso *dso __used, const char *name, + enum dso_binary_type type) +{ + int fd = open(name, O_RDONLY); + if (fd < 0) + return -1; + + ss->name = strdup(name); + if (!ss->name) + goto out_close; + + ss->type = type; + + return 0; +out_close: + close(fd); + return -1; +} + +bool symsrc__possibly_runtime(struct symsrc *ss __used) +{ + /* Assume all sym sources could be a runtime image. */ + return true; +} + +bool symsrc__has_symtab(struct symsrc *ss __used) +{ + return false; +} + +void symsrc__destroy(struct symsrc *ss) +{ + free(ss->name); + close(ss->fd); +} + +int dso__synthesize_plt_symbols(struct dso *dso __used, + struct symsrc *ss __used, + struct map *map __used, + symbol_filter_t filter __used) +{ + return 0; +} + +int dso__load_sym(struct dso *dso, struct map *map __used, struct symsrc *ss, + struct symsrc *runtime_ss __used, + symbol_filter_t filter __used, int kmodule __used) +{ + unsigned char *build_id[BUILD_ID_SIZE]; + + if (filename__read_build_id(ss->name, build_id, BUILD_ID_SIZE) > 0) { + dso__set_build_id(dso, build_id); + return 1; + } + return 0; +} + +void symbol__elf_init(void) +{ +} diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 8b63b678e127..753699a20bc8 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -15,8 +15,6 @@ #include "symbol.h" #include "strlist.h" -#include <libelf.h> -#include <gelf.h> #include <elf.h> #include <limits.h> #include <sys/utsname.h> @@ -25,15 +23,7 @@ #define KSYM_NAME_LEN 256 #endif -#ifndef NT_GNU_BUILD_ID -#define NT_GNU_BUILD_ID 3 -#endif - static void dso_cache__free(struct rb_root *root); -static bool dso__build_id_equal(const struct dso *dso, u8 *build_id); -static int elf_read_build_id(Elf *elf, void *bf, size_t size); -static void dsos__add(struct list_head *head, struct dso *dso); -static struct map *map__new2(u64 start, struct dso *dso, enum map_type type); static int dso__load_kernel_sym(struct dso *dso, struct map *map, symbol_filter_t filter); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map, @@ -170,7 +160,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb) return SYMBOL_B; } -static void symbols__fixup_duplicate(struct rb_root *symbols) +void symbols__fixup_duplicate(struct rb_root *symbols) { struct rb_node *nd; struct symbol *curr, *next; @@ -199,7 +189,7 @@ again: } } -static void symbols__fixup_end(struct rb_root *symbols) +void symbols__fixup_end(struct rb_root *symbols) { struct rb_node *nd, *prevnd = rb_first(symbols); struct symbol *curr, *prev; @@ -222,7 +212,7 @@ static void symbols__fixup_end(struct rb_root *symbols) curr->end = roundup(curr->start, 4096); } -static void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) +void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) { struct map *prev, *curr; struct rb_node *nd, *prevnd = rb_first(&mg->maps[type]); @@ -252,8 +242,7 @@ static void map_groups__fixup_end(struct map_groups *mg) __map_groups__fixup_end(mg, i); } -static struct symbol *symbol__new(u64 start, u64 len, u8 binding, - const char *name) +struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) { size_t namelen = strlen(name) + 1; struct symbol *sym = calloc(1, (symbol_conf.priv_size + @@ -390,7 +379,7 @@ void dso__set_build_id(struct dso *dso, void *build_id) dso->has_build_id = 1; } -static void symbols__insert(struct rb_root *symbols, struct symbol *sym) +void symbols__insert(struct rb_root *symbols, struct symbol *sym) { struct rb_node **p = &symbols->rb_node; struct rb_node *parent = NULL; @@ -574,7 +563,7 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp) int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, - char type, u64 start, u64 end)) + char type, u64 start)) { char *line = NULL; size_t n; @@ -614,13 +603,8 @@ int kallsyms__parse(const char *filename, void *arg, break; } - /* - * module symbols are not sorted so we add all - * symbols with zero length and rely on - * symbols__fixup_end() to fix it up. - */ err = process_symbol(arg, symbol_name, - symbol_type, start, start); + symbol_type, start); if (err) break; } @@ -647,7 +631,7 @@ static u8 kallsyms2elf_type(char type) } static int map__process_kallsym_symbol(void *arg, const char *name, - char type, u64 start, u64 end) + char type, u64 start) { struct symbol *sym; struct process_kallsyms_args *a = arg; @@ -656,8 +640,12 @@ static int map__process_kallsym_symbol(void *arg, const char *name, if (!symbol_type__is_a(type, a->map->type)) return 0; - sym = symbol__new(start, end - start + 1, - kallsyms2elf_type(type), name); + /* + * module symbols are not sorted so we add all + * symbols, setting length to 0, and rely on + * symbols__fixup_end() to fix it up. + */ + sym = symbol__new(start, 0, kallsyms2elf_type(type), name); if (sym == NULL) return -ENOMEM; /* @@ -904,556 +892,7 @@ out_failure: return -1; } -/** - * elf_symtab__for_each_symbol - iterate thru all the symbols - * - * @syms: struct elf_symtab instance to iterate - * @idx: uint32_t idx - * @sym: GElf_Sym iterator - */ -#define elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) \ - for (idx = 0, gelf_getsym(syms, idx, &sym);\ - idx < nr_syms; \ - idx++, gelf_getsym(syms, idx, &sym)) - -static inline uint8_t elf_sym__type(const GElf_Sym *sym) -{ - return GELF_ST_TYPE(sym->st_info); -} - -static inline int elf_sym__is_function(const GElf_Sym *sym) -{ - return elf_sym__type(sym) == STT_FUNC && - sym->st_name != 0 && - sym->st_shndx != SHN_UNDEF; -} - -static inline bool elf_sym__is_object(const GElf_Sym *sym) -{ - return elf_sym__type(sym) == STT_OBJECT && - sym->st_name != 0 && - sym->st_shndx != SHN_UNDEF; -} - -static inline int elf_sym__is_label(const GElf_Sym *sym) -{ - return elf_sym__type(sym) == STT_NOTYPE && - sym->st_name != 0 && - sym->st_shndx != SHN_UNDEF && - sym->st_shndx != SHN_ABS; -} - -static inline const char *elf_sec__name(const GElf_Shdr *shdr, - const Elf_Data *secstrs) -{ - return secstrs->d_buf + shdr->sh_name; -} - -static inline int elf_sec__is_text(const GElf_Shdr *shdr, - const Elf_Data *secstrs) -{ - return strstr(elf_sec__name(shdr, secstrs), "text") != NULL; -} - -static inline bool elf_sec__is_data(const GElf_Shdr *shdr, - const Elf_Data *secstrs) -{ - return strstr(elf_sec__name(shdr, secstrs), "data") != NULL; -} - -static inline const char *elf_sym__name(const GElf_Sym *sym, - const Elf_Data *symstrs) -{ - return symstrs->d_buf + sym->st_name; -} - -static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, - GElf_Shdr *shp, const char *name, - size_t *idx) -{ - Elf_Scn *sec = NULL; - size_t cnt = 1; - - while ((sec = elf_nextscn(elf, sec)) != NULL) { - char *str; - - gelf_getshdr(sec, shp); - str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name); - if (!strcmp(name, str)) { - if (idx) - *idx = cnt; - break; - } - ++cnt; - } - - return sec; -} - -#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \ - for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \ - idx < nr_entries; \ - ++idx, pos = gelf_getrel(reldata, idx, &pos_mem)) - -#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \ - for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \ - idx < nr_entries; \ - ++idx, pos = gelf_getrela(reldata, idx, &pos_mem)) - -/* - * We need to check if we have a .dynsym, so that we can handle the - * .plt, synthesizing its symbols, that aren't on the symtabs (be it - * .dynsym or .symtab). - * And always look at the original dso, not at debuginfo packages, that - * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). - */ -static int -dso__synthesize_plt_symbols(struct dso *dso, char *name, struct map *map, - symbol_filter_t filter) -{ - uint32_t nr_rel_entries, idx; - GElf_Sym sym; - u64 plt_offset; - GElf_Shdr shdr_plt; - struct symbol *f; - GElf_Shdr shdr_rel_plt, shdr_dynsym; - Elf_Data *reldata, *syms, *symstrs; - Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym; - size_t dynsym_idx; - GElf_Ehdr ehdr; - char sympltname[1024]; - Elf *elf; - int nr = 0, symidx, fd, err = 0; - - fd = open(name, O_RDONLY); - if (fd < 0) - goto out; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) - goto out_close; - - if (gelf_getehdr(elf, &ehdr) == NULL) - goto out_elf_end; - - scn_dynsym = elf_section_by_name(elf, &ehdr, &shdr_dynsym, - ".dynsym", &dynsym_idx); - if (scn_dynsym == NULL) - goto out_elf_end; - - scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, - ".rela.plt", NULL); - if (scn_plt_rel == NULL) { - scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt, - ".rel.plt", NULL); - if (scn_plt_rel == NULL) - goto out_elf_end; - } - - err = -1; - - if (shdr_rel_plt.sh_link != dynsym_idx) - goto out_elf_end; - - if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL) - goto out_elf_end; - - /* - * Fetch the relocation section to find the idxes to the GOT - * and the symbols in the .dynsym they refer to. - */ - reldata = elf_getdata(scn_plt_rel, NULL); - if (reldata == NULL) - goto out_elf_end; - - syms = elf_getdata(scn_dynsym, NULL); - if (syms == NULL) - goto out_elf_end; - - scn_symstrs = elf_getscn(elf, shdr_dynsym.sh_link); - if (scn_symstrs == NULL) - goto out_elf_end; - - symstrs = elf_getdata(scn_symstrs, NULL); - if (symstrs == NULL) - goto out_elf_end; - - nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; - plt_offset = shdr_plt.sh_offset; - - if (shdr_rel_plt.sh_type == SHT_RELA) { - GElf_Rela pos_mem, *pos; - - elf_section__for_each_rela(reldata, pos, pos_mem, idx, - nr_rel_entries) { - symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; - gelf_getsym(syms, symidx, &sym); - snprintf(sympltname, sizeof(sympltname), - "%s@plt", elf_sym__name(&sym, symstrs)); - - f = symbol__new(plt_offset, shdr_plt.sh_entsize, - STB_GLOBAL, sympltname); - if (!f) - goto out_elf_end; - - if (filter && filter(map, f)) - symbol__delete(f); - else { - symbols__insert(&dso->symbols[map->type], f); - ++nr; - } - } - } else if (shdr_rel_plt.sh_type == SHT_REL) { - GElf_Rel pos_mem, *pos; - elf_section__for_each_rel(reldata, pos, pos_mem, idx, - nr_rel_entries) { - symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; - gelf_getsym(syms, symidx, &sym); - snprintf(sympltname, sizeof(sympltname), - "%s@plt", elf_sym__name(&sym, symstrs)); - - f = symbol__new(plt_offset, shdr_plt.sh_entsize, - STB_GLOBAL, sympltname); - if (!f) - goto out_elf_end; - - if (filter && filter(map, f)) - symbol__delete(f); - else { - symbols__insert(&dso->symbols[map->type], f); - ++nr; - } - } - } - - err = 0; -out_elf_end: - elf_end(elf); -out_close: - close(fd); - - if (err == 0) - return nr; -out: - pr_debug("%s: problems reading %s PLT info.\n", - __func__, dso->long_name); - return 0; -} - -static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type) -{ - switch (type) { - case MAP__FUNCTION: - return elf_sym__is_function(sym); - case MAP__VARIABLE: - return elf_sym__is_object(sym); - default: - return false; - } -} - -static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs, - enum map_type type) -{ - switch (type) { - case MAP__FUNCTION: - return elf_sec__is_text(shdr, secstrs); - case MAP__VARIABLE: - return elf_sec__is_data(shdr, secstrs); - default: - return false; - } -} - -static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) -{ - Elf_Scn *sec = NULL; - GElf_Shdr shdr; - size_t cnt = 1; - - while ((sec = elf_nextscn(elf, sec)) != NULL) { - gelf_getshdr(sec, &shdr); - - if ((addr >= shdr.sh_addr) && - (addr < (shdr.sh_addr + shdr.sh_size))) - return cnt; - - ++cnt; - } - - return -1; -} - -static int dso__swap_init(struct dso *dso, unsigned char eidata) -{ - static unsigned int const endian = 1; - - dso->needs_swap = DSO_SWAP__NO; - - switch (eidata) { - case ELFDATA2LSB: - /* We are big endian, DSO is little endian. */ - if (*(unsigned char const *)&endian != 1) - dso->needs_swap = DSO_SWAP__YES; - break; - - case ELFDATA2MSB: - /* We are little endian, DSO is big endian. */ - if (*(unsigned char const *)&endian != 0) - dso->needs_swap = DSO_SWAP__YES; - break; - - default: - pr_err("unrecognized DSO data encoding %d\n", eidata); - return -EINVAL; - } - - return 0; -} - -static int dso__load_sym(struct dso *dso, struct map *map, const char *name, - int fd, symbol_filter_t filter, int kmodule, - int want_symtab) -{ - struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; - struct map *curr_map = map; - struct dso *curr_dso = dso; - Elf_Data *symstrs, *secstrs; - uint32_t nr_syms; - int err = -1; - uint32_t idx; - GElf_Ehdr ehdr; - GElf_Shdr shdr, opdshdr; - Elf_Data *syms, *opddata = NULL; - GElf_Sym sym; - Elf_Scn *sec, *sec_strndx, *opdsec; - Elf *elf; - int nr = 0; - size_t opdidx = 0; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) { - pr_debug("%s: cannot read %s ELF file.\n", __func__, name); - goto out_close; - } - - if (gelf_getehdr(elf, &ehdr) == NULL) { - pr_debug("%s: cannot get elf header.\n", __func__); - goto out_elf_end; - } - - if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) - goto out_elf_end; - - /* Always reject images with a mismatched build-id: */ - if (dso->has_build_id) { - u8 build_id[BUILD_ID_SIZE]; - - if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) - goto out_elf_end; - - if (!dso__build_id_equal(dso, build_id)) - goto out_elf_end; - } - - sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL); - if (sec == NULL) { - if (want_symtab) - goto out_elf_end; - - sec = elf_section_by_name(elf, &ehdr, &shdr, ".dynsym", NULL); - if (sec == NULL) - goto out_elf_end; - } - - opdsec = elf_section_by_name(elf, &ehdr, &opdshdr, ".opd", &opdidx); - if (opdshdr.sh_type != SHT_PROGBITS) - opdsec = NULL; - if (opdsec) - opddata = elf_rawdata(opdsec, NULL); - - syms = elf_getdata(sec, NULL); - if (syms == NULL) - goto out_elf_end; - - sec = elf_getscn(elf, shdr.sh_link); - if (sec == NULL) - goto out_elf_end; - - symstrs = elf_getdata(sec, NULL); - if (symstrs == NULL) - goto out_elf_end; - - sec_strndx = elf_getscn(elf, ehdr.e_shstrndx); - if (sec_strndx == NULL) - goto out_elf_end; - - secstrs = elf_getdata(sec_strndx, NULL); - if (secstrs == NULL) - goto out_elf_end; - - nr_syms = shdr.sh_size / shdr.sh_entsize; - - memset(&sym, 0, sizeof(sym)); - if (dso->kernel == DSO_TYPE_USER) { - dso->adjust_symbols = (ehdr.e_type == ET_EXEC || - elf_section_by_name(elf, &ehdr, &shdr, - ".gnu.prelink_undo", - NULL) != NULL); - } else { - dso->adjust_symbols = 0; - } - elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) { - struct symbol *f; - const char *elf_name = elf_sym__name(&sym, symstrs); - char *demangled = NULL; - int is_label = elf_sym__is_label(&sym); - const char *section_name; - - if (kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name && - strcmp(elf_name, kmap->ref_reloc_sym->name) == 0) - kmap->ref_reloc_sym->unrelocated_addr = sym.st_value; - - if (!is_label && !elf_sym__is_a(&sym, map->type)) - continue; - - /* Reject ARM ELF "mapping symbols": these aren't unique and - * don't identify functions, so will confuse the profile - * output: */ - if (ehdr.e_machine == EM_ARM) { - if (!strcmp(elf_name, "$a") || - !strcmp(elf_name, "$d") || - !strcmp(elf_name, "$t")) - continue; - } - - if (opdsec && sym.st_shndx == opdidx) { - u32 offset = sym.st_value - opdshdr.sh_addr; - u64 *opd = opddata->d_buf + offset; - sym.st_value = DSO__SWAP(dso, u64, *opd); - sym.st_shndx = elf_addr_to_index(elf, sym.st_value); - } - - sec = elf_getscn(elf, sym.st_shndx); - if (!sec) - goto out_elf_end; - - gelf_getshdr(sec, &shdr); - - if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type)) - continue; - - section_name = elf_sec__name(&shdr, secstrs); - - /* On ARM, symbols for thumb functions have 1 added to - * the symbol address as a flag - remove it */ - if ((ehdr.e_machine == EM_ARM) && - (map->type == MAP__FUNCTION) && - (sym.st_value & 1)) - --sym.st_value; - - if (dso->kernel != DSO_TYPE_USER || kmodule) { - char dso_name[PATH_MAX]; - - if (strcmp(section_name, - (curr_dso->short_name + - dso->short_name_len)) == 0) - goto new_symbol; - - if (strcmp(section_name, ".text") == 0) { - curr_map = map; - curr_dso = dso; - goto new_symbol; - } - - snprintf(dso_name, sizeof(dso_name), - "%s%s", dso->short_name, section_name); - - curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name); - if (curr_map == NULL) { - u64 start = sym.st_value; - - if (kmodule) - start += map->start + shdr.sh_offset; - - curr_dso = dso__new(dso_name); - if (curr_dso == NULL) - goto out_elf_end; - curr_dso->kernel = dso->kernel; - curr_dso->long_name = dso->long_name; - curr_dso->long_name_len = dso->long_name_len; - curr_map = map__new2(start, curr_dso, - map->type); - if (curr_map == NULL) { - dso__delete(curr_dso); - goto out_elf_end; - } - curr_map->map_ip = identity__map_ip; - curr_map->unmap_ip = identity__map_ip; - curr_dso->symtab_type = dso->symtab_type; - map_groups__insert(kmap->kmaps, curr_map); - dsos__add(&dso->node, curr_dso); - dso__set_loaded(curr_dso, map->type); - } else - curr_dso = curr_map->dso; - - goto new_symbol; - } - - if (curr_dso->adjust_symbols) { - pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " - "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, - (u64)sym.st_value, (u64)shdr.sh_addr, - (u64)shdr.sh_offset); - sym.st_value -= shdr.sh_addr - shdr.sh_offset; - } - /* - * We need to figure out if the object was created from C++ sources - * DWARF DW_compile_unit has this, but we don't always have access - * to it... - */ - demangled = bfd_demangle(NULL, elf_name, DMGL_PARAMS | DMGL_ANSI); - if (demangled != NULL) - elf_name = demangled; -new_symbol: - f = symbol__new(sym.st_value, sym.st_size, - GELF_ST_BIND(sym.st_info), elf_name); - free(demangled); - if (!f) - goto out_elf_end; - - if (filter && filter(curr_map, f)) - symbol__delete(f); - else { - symbols__insert(&curr_dso->symbols[curr_map->type], f); - nr++; - } - } - - /* - * For misannotated, zeroed, ASM function sizes. - */ - if (nr > 0) { - symbols__fixup_duplicate(&dso->symbols[map->type]); - symbols__fixup_end(&dso->symbols[map->type]); - if (kmap) { - /* - * We need to fixup this here too because we create new - * maps here, for things like vsyscall sections. - */ - __map_groups__fixup_end(kmap->kmaps, map->type); - } - } - err = nr; -out_elf_end: - elf_end(elf); -out_close: - return err; -} - -static bool dso__build_id_equal(const struct dso *dso, u8 *build_id) +bool dso__build_id_equal(const struct dso *dso, u8 *build_id) { return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0; } @@ -1480,216 +919,11 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) return have_build_id; } -/* - * Align offset to 4 bytes as needed for note name and descriptor data. - */ -#define NOTE_ALIGN(n) (((n) + 3) & -4U) - -static int elf_read_build_id(Elf *elf, void *bf, size_t size) -{ - int err = -1; - GElf_Ehdr ehdr; - GElf_Shdr shdr; - Elf_Data *data; - Elf_Scn *sec; - Elf_Kind ek; - void *ptr; - - if (size < BUILD_ID_SIZE) - goto out; - - ek = elf_kind(elf); - if (ek != ELF_K_ELF) - goto out; - - if (gelf_getehdr(elf, &ehdr) == NULL) { - pr_err("%s: cannot get elf header.\n", __func__); - goto out; - } - - /* - * Check following sections for notes: - * '.note.gnu.build-id' - * '.notes' - * '.note' (VDSO specific) - */ - do { - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".note.gnu.build-id", NULL); - if (sec) - break; - - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".notes", NULL); - if (sec) - break; - - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".note", NULL); - if (sec) - break; - - return err; - - } while (0); - - data = elf_getdata(sec, NULL); - if (data == NULL) - goto out; - - ptr = data->d_buf; - while (ptr < (data->d_buf + data->d_size)) { - GElf_Nhdr *nhdr = ptr; - size_t namesz = NOTE_ALIGN(nhdr->n_namesz), - descsz = NOTE_ALIGN(nhdr->n_descsz); - const char *name; - - ptr += sizeof(*nhdr); - name = ptr; - ptr += namesz; - if (nhdr->n_type == NT_GNU_BUILD_ID && - nhdr->n_namesz == sizeof("GNU")) { - if (memcmp(name, "GNU", sizeof("GNU")) == 0) { - size_t sz = min(size, descsz); - memcpy(bf, ptr, sz); - memset(bf + sz, 0, size - sz); - err = descsz; - break; - } - } - ptr += descsz; - } - -out: - return err; -} - -int filename__read_build_id(const char *filename, void *bf, size_t size) -{ - int fd, err = -1; - Elf *elf; - - if (size < BUILD_ID_SIZE) - goto out; - - fd = open(filename, O_RDONLY); - if (fd < 0) - goto out; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) { - pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); - goto out_close; - } - - err = elf_read_build_id(elf, bf, size); - - elf_end(elf); -out_close: - close(fd); -out: - return err; -} - -int sysfs__read_build_id(const char *filename, void *build_id, size_t size) -{ - int fd, err = -1; - - if (size < BUILD_ID_SIZE) - goto out; - - fd = open(filename, O_RDONLY); - if (fd < 0) - goto out; - - while (1) { - char bf[BUFSIZ]; - GElf_Nhdr nhdr; - size_t namesz, descsz; - - if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr)) - break; - - namesz = NOTE_ALIGN(nhdr.n_namesz); - descsz = NOTE_ALIGN(nhdr.n_descsz); - if (nhdr.n_type == NT_GNU_BUILD_ID && - nhdr.n_namesz == sizeof("GNU")) { - if (read(fd, bf, namesz) != (ssize_t)namesz) - break; - if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { - size_t sz = min(descsz, size); - if (read(fd, build_id, sz) == (ssize_t)sz) { - memset(build_id + sz, 0, size - sz); - err = 0; - break; - } - } else if (read(fd, bf, descsz) != (ssize_t)descsz) - break; - } else { - int n = namesz + descsz; - if (read(fd, bf, n) != n) - break; - } - } - close(fd); -out: - return err; -} - -static int filename__read_debuglink(const char *filename, - char *debuglink, size_t size) -{ - int fd, err = -1; - Elf *elf; - GElf_Ehdr ehdr; - GElf_Shdr shdr; - Elf_Data *data; - Elf_Scn *sec; - Elf_Kind ek; - - fd = open(filename, O_RDONLY); - if (fd < 0) - goto out; - - elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); - if (elf == NULL) { - pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename); - goto out_close; - } - - ek = elf_kind(elf); - if (ek != ELF_K_ELF) - goto out_close; - - if (gelf_getehdr(elf, &ehdr) == NULL) { - pr_err("%s: cannot get elf header.\n", __func__); - goto out_close; - } - - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".gnu_debuglink", NULL); - if (sec == NULL) - goto out_close; - - data = elf_getdata(sec, NULL); - if (data == NULL) - goto out_close; - - /* the start of this section is a zero-terminated string */ - strncpy(debuglink, data->d_buf, size); - - elf_end(elf); - -out_close: - close(fd); -out: - return err; -} - char dso__symtab_origin(const struct dso *dso) { static const char origin[] = { [DSO_BINARY_TYPE__KALLSYMS] = 'k', + [DSO_BINARY_TYPE__VMLINUX] = 'v', [DSO_BINARY_TYPE__JAVA_JIT] = 'j', [DSO_BINARY_TYPE__DEBUGLINK] = 'l', [DSO_BINARY_TYPE__BUILD_ID_CACHE] = 'B', @@ -1700,6 +934,7 @@ char dso__symtab_origin(const struct dso *dso) [DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE] = 'K', [DSO_BINARY_TYPE__GUEST_KALLSYMS] = 'g', [DSO_BINARY_TYPE__GUEST_KMODULE] = 'G', + [DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V', }; if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND) @@ -1775,7 +1010,9 @@ int dso__binary_type_file(struct dso *dso, enum dso_binary_type type, default: case DSO_BINARY_TYPE__KALLSYMS: + case DSO_BINARY_TYPE__VMLINUX: case DSO_BINARY_TYPE__GUEST_KALLSYMS: + case DSO_BINARY_TYPE__GUEST_VMLINUX: case DSO_BINARY_TYPE__JAVA_JIT: case DSO_BINARY_TYPE__NOT_FOUND: ret = -1; @@ -1789,11 +1026,12 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) { char *name; int ret = -1; - int fd; u_int i; struct machine *machine; char *root_dir = (char *) ""; - int want_symtab; + int ss_pos = 0; + struct symsrc ss_[2]; + struct symsrc *syms_ss = NULL, *runtime_ss = NULL; dso__set_loaded(dso, map->type); @@ -1835,54 +1073,69 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) root_dir = machine->root_dir; /* Iterate over candidate debug images. - * On the first pass, only load images if they have a full symtab. - * Failing that, do a second pass where we accept .dynsym also + * Keep track of "interesting" ones (those which have a symtab, dynsym, + * and/or opd section) for processing. */ - want_symtab = 1; -restart: for (i = 0; i < DSO_BINARY_TYPE__SYMTAB_CNT; i++) { + struct symsrc *ss = &ss_[ss_pos]; + bool next_slot = false; - dso->symtab_type = binary_type_symtab[i]; + enum dso_binary_type symtab_type = binary_type_symtab[i]; - if (dso__binary_type_file(dso, dso->symtab_type, + if (dso__binary_type_file(dso, symtab_type, root_dir, name, PATH_MAX)) continue; /* Name is now the name of the next image to try */ - fd = open(name, O_RDONLY); - if (fd < 0) + if (symsrc__init(ss, dso, name, symtab_type) < 0) continue; - ret = dso__load_sym(dso, map, name, fd, filter, 0, - want_symtab); - close(fd); + if (!syms_ss && symsrc__has_symtab(ss)) { + syms_ss = ss; + next_slot = true; + } - /* - * Some people seem to have debuginfo files _WITHOUT_ debug - * info!?!? - */ - if (!ret) - continue; + if (!runtime_ss && symsrc__possibly_runtime(ss)) { + runtime_ss = ss; + next_slot = true; + } - if (ret > 0) { - int nr_plt; + if (next_slot) { + ss_pos++; - nr_plt = dso__synthesize_plt_symbols(dso, name, map, filter); - if (nr_plt > 0) - ret += nr_plt; - break; + if (syms_ss && runtime_ss) + break; } + } - /* - * If we wanted a full symtab but no image had one, - * relax our requirements and repeat the search. - */ - if (ret <= 0 && want_symtab) { - want_symtab = 0; - goto restart; + if (!runtime_ss && !syms_ss) + goto out_free; + + if (runtime_ss && !syms_ss) { + syms_ss = runtime_ss; + } + + /* We'll have to hope for the best */ + if (!runtime_ss && syms_ss) + runtime_ss = syms_ss; + + if (syms_ss) + ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, 0); + else + ret = -1; + + if (ret > 0) { + int nr_plt; + + nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map, filter); + if (nr_plt > 0) + ret += nr_plt; } + for (; ss_pos > 0; ss_pos--) + symsrc__destroy(&ss_[ss_pos - 1]); +out_free: free(name); if (ret < 0 && strstr(dso->name, " (deleted)") != NULL) return 0; @@ -2030,25 +1283,6 @@ static int machine__set_modules_path(struct machine *machine) return map_groups__set_modules_path_dir(&machine->kmaps, modules_path); } -/* - * Constructor variant for modules (where we know from /proc/modules where - * they are loaded) and for vmlinux, where only after we load all the - * symbols we'll know where it starts and ends. - */ -static struct map *map__new2(u64 start, struct dso *dso, enum map_type type) -{ - struct map *map = calloc(1, (sizeof(*map) + - (dso->kernel ? sizeof(struct kmap) : 0))); - if (map != NULL) { - /* - * ->end will be filled after we load all the symbols - */ - map__init(map, type, start, 0, 0, dso); - } - - return map; -} - struct map *machine__new_module(struct machine *machine, u64 start, const char *filename) { @@ -2141,22 +1375,30 @@ out_failure: int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, symbol_filter_t filter) { - int err = -1, fd; + int err = -1; + struct symsrc ss; char symfs_vmlinux[PATH_MAX]; + enum dso_binary_type symtab_type; snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s%s", symbol_conf.symfs, vmlinux); - fd = open(symfs_vmlinux, O_RDONLY); - if (fd < 0) + + if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX; + else + symtab_type = DSO_BINARY_TYPE__VMLINUX; + + if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type)) return -1; - dso__set_long_name(dso, (char *)vmlinux); - dso__set_loaded(dso, map->type); - err = dso__load_sym(dso, map, symfs_vmlinux, fd, filter, 0, 0); - close(fd); + err = dso__load_sym(dso, map, &ss, &ss, filter, 0); + symsrc__destroy(&ss); - if (err > 0) + if (err > 0) { + dso__set_long_name(dso, (char *)vmlinux); + dso__set_loaded(dso, map->type); pr_debug("Using %s for symbols\n", symfs_vmlinux); + } return err; } @@ -2173,10 +1415,8 @@ int dso__load_vmlinux_path(struct dso *dso, struct map *map, filename = dso__build_id_filename(dso, NULL, 0); if (filename != NULL) { err = dso__load_vmlinux(dso, map, filename, filter); - if (err > 0) { - dso__set_long_name(dso, filename); + if (err > 0) goto out; - } free(filename); } @@ -2291,9 +1531,8 @@ do_kallsyms: free(kallsyms_allocated_filename); if (err > 0) { + dso__set_long_name(dso, strdup("[kernel.kallsyms]")); out_fixup: - if (kallsyms_filename != NULL) - dso__set_long_name(dso, strdup("[kernel.kallsyms]")); map__fixup_start(map); map__fixup_end(map); } @@ -2352,7 +1591,7 @@ out_try_fixup: return err; } -static void dsos__add(struct list_head *head, struct dso *dso) +void dsos__add(struct list_head *head, struct dso *dso) { list_add_tail(&dso->node, head); } @@ -2516,7 +1755,7 @@ struct process_args { }; static int symbol__in_kernel(void *arg, const char *name, - char type __used, u64 start, u64 end __used) + char type __used, u64 start) { struct process_args *args = arg; @@ -2754,7 +1993,8 @@ int symbol__init(void) symbol_conf.priv_size = ALIGN(symbol_conf.priv_size, sizeof(u64)); - elf_version(EV_CURRENT); + symbol__elf_init(); + if (symbol_conf.sort_by_name) symbol_conf.priv_size += (sizeof(struct symbol_name_rb_node) - sizeof(struct symbol)); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 1fe733a1e21f..fc4b1e630fd9 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -11,6 +11,12 @@ #include <stdio.h> #include <byteswap.h> +#ifndef NO_LIBELF_SUPPORT +#include <libelf.h> +#include <gelf.h> +#include <elf.h> +#endif + #ifdef HAVE_CPLUS_DEMANGLE extern char *cplus_demangle(const char *, int); @@ -158,6 +164,8 @@ struct addr_location { enum dso_binary_type { DSO_BINARY_TYPE__KALLSYMS = 0, DSO_BINARY_TYPE__GUEST_KALLSYMS, + DSO_BINARY_TYPE__VMLINUX, + DSO_BINARY_TYPE__GUEST_VMLINUX, DSO_BINARY_TYPE__JAVA_JIT, DSO_BINARY_TYPE__DEBUGLINK, DSO_BINARY_TYPE__BUILD_ID_CACHE, @@ -217,6 +225,36 @@ struct dso { char name[0]; }; +struct symsrc { + char *name; + int fd; + enum dso_binary_type type; + +#ifndef NO_LIBELF_SUPPORT + Elf *elf; + GElf_Ehdr ehdr; + + Elf_Scn *opdsec; + size_t opdidx; + GElf_Shdr opdshdr; + + Elf_Scn *symtab; + GElf_Shdr symshdr; + + Elf_Scn *dynsym; + size_t dynsym_idx; + GElf_Shdr dynshdr; + + bool adjust_symbols; +#endif +}; + +void symsrc__destroy(struct symsrc *ss); +int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, + enum dso_binary_type type); +bool symsrc__has_symtab(struct symsrc *ss); +bool symsrc__possibly_runtime(struct symsrc *ss); + #define DSO__SWAP(dso, type, val) \ ({ \ type ____r = val; \ @@ -254,6 +292,7 @@ static inline void dso__set_loaded(struct dso *dso, enum map_type type) void dso__sort_by_name(struct dso *dso, enum map_type type); +void dsos__add(struct list_head *head, struct dso *dso); struct dso *__dsos__findnew(struct list_head *head, const char *name); int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter); @@ -283,6 +322,7 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp); char dso__symtab_origin(const struct dso *dso); void dso__set_long_name(struct dso *dso, char *name); void dso__set_build_id(struct dso *dso, void *build_id); +bool dso__build_id_equal(const struct dso *dso, u8 *build_id); void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine); struct map *dso__new_map(const char *name); @@ -297,7 +337,9 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits); int build_id__sprintf(const u8 *build_id, int len, char *bf); int kallsyms__parse(const char *filename, void *arg, int (*process_symbol)(void *arg, const char *name, - char type, u64 start, u64 end)); + char type, u64 start)); +int filename__read_debuglink(const char *filename, char *debuglink, + size_t size); void machine__destroy_kernel_maps(struct machine *machine); int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel); @@ -309,6 +351,8 @@ void machines__destroy_guest_kernel_maps(struct rb_root *machines); int symbol__init(void); void symbol__exit(void); +void symbol__elf_init(void); +struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); size_t symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, FILE *fp); size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); @@ -326,4 +370,15 @@ ssize_t dso__data_read_addr(struct dso *dso, struct map *map, struct machine *machine, u64 addr, u8 *data, ssize_t size); int dso__test_data(void); +int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, + struct symsrc *runtime_ss, symbol_filter_t filter, + int kmodule); +int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, + struct map *map, symbol_filter_t filter); + +void symbols__insert(struct rb_root *symbols, struct symbol *sym); +void symbols__fixup_duplicate(struct rb_root *symbols); +void symbols__fixup_end(struct rb_root *symbols); +void __map_groups__fixup_end(struct map_groups *mg, enum map_type type); + #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 7eeebcee291c..884dde9b9bc1 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -58,8 +58,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) } if (top->evlist->nr_entries == 1) { - struct perf_evsel *first; - first = list_entry(top->evlist->entries.next, struct perf_evsel, node); + struct perf_evsel *first = perf_evlist__first(top->evlist); ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ", (uint64_t)first->attr.sample_period, top->freq ? "Hz" : ""); diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 0715c843c2e7..a5a554efeb50 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -162,25 +162,16 @@ int trace_parse_common_pid(struct pevent *pevent, void *data) return pevent_data_pid(pevent, &record); } -unsigned long long read_size(struct pevent *pevent, void *ptr, int size) +unsigned long long read_size(struct event_format *event, void *ptr, int size) { - return pevent_read_number(pevent, ptr, size); + return pevent_read_number(event->pevent, ptr, size); } -void print_trace_event(struct pevent *pevent, int cpu, void *data, int size) +void event_format__print(struct event_format *event, + int cpu, void *data, int size) { - struct event_format *event; struct pevent_record record; struct trace_seq s; - int type; - - type = trace_parse_common_type(pevent, data); - - event = pevent_find_event(pevent, type); - if (!event) { - warning("ug! no event found for type %d", type); - return; - } memset(&record, 0, sizeof(record)); record.cpu = cpu; @@ -192,6 +183,19 @@ void print_trace_event(struct pevent *pevent, int cpu, void *data, int size) trace_seq_do_printf(&s); } +void print_trace_event(struct pevent *pevent, int cpu, void *data, int size) +{ + int type = trace_parse_common_type(pevent, data); + struct event_format *event = pevent_find_event(pevent, type); + + if (!event) { + warning("ug! no event found for type %d", type); + return; + } + + event_format__print(event, cpu, data, size); +} + void print_event(struct pevent *pevent, int cpu, void *data, int size, unsigned long long nsecs, char *comm) { @@ -289,7 +293,7 @@ struct event_format *trace_find_next_event(struct pevent *pevent, { static int idx; - if (!pevent->events) + if (!pevent || !pevent->events) return NULL; if (!event) { diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 474aa7a7df43..302ff262494c 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -36,11 +36,10 @@ static int stop_script_unsupported(void) } static void process_event_unsupported(union perf_event *event __unused, - struct pevent *pevent __unused, struct perf_sample *sample __unused, struct perf_evsel *evsel __unused, struct machine *machine __unused, - struct thread *thread __unused) + struct addr_location *al __unused) { } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 8fef1d6687b7..a55fd37ffea1 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -9,7 +9,6 @@ struct machine; struct perf_sample; union perf_event; struct perf_tool; -struct thread; extern int header_page_size_size; extern int header_page_ts_size; @@ -32,6 +31,8 @@ int bigendian(void); struct pevent *read_trace_init(int file_bigendian, int host_bigendian); void print_trace_event(struct pevent *pevent, int cpu, void *data, int size); +void event_format__print(struct event_format *event, + int cpu, void *data, int size); void print_event(struct pevent *pevent, int cpu, void *data, int size, unsigned long long nsecs, char *comm); @@ -56,7 +57,7 @@ int trace_parse_common_pid(struct pevent *pevent, void *data); struct event_format *trace_find_next_event(struct pevent *pevent, struct event_format *event); -unsigned long long read_size(struct pevent *pevent, void *ptr, int size); +unsigned long long read_size(struct event_format *event, void *ptr, int size); unsigned long long eval_flag(const char *flag); struct pevent_record *trace_read_data(struct pevent *pevent, int cpu); @@ -74,16 +75,19 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs, void tracing_data_put(struct tracing_data *tdata); +struct addr_location; + +struct perf_session; + struct scripting_ops { const char *name; int (*start_script) (const char *script, int argc, const char **argv); int (*stop_script) (void); void (*process_event) (union perf_event *event, - struct pevent *pevent, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine, - struct thread *thread); + struct addr_location *al); int (*generate_script) (struct pevent *pevent, const char *outfile); }; diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c new file mode 100644 index 000000000000..00a42aa8d5c1 --- /dev/null +++ b/tools/perf/util/unwind.c @@ -0,0 +1,567 @@ +/* + * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps. + * + * Lots of this code have been borrowed or heavily inspired from parts of + * the libunwind 0.99 code which are (amongst other contributors I may have + * forgotten): + * + * Copyright (C) 2002-2007 Hewlett-Packard Co + * Contributed by David Mosberger-Tang <davidm@hpl.hp.com> + * + * And the bugs have been added by: + * + * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com> + * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com> + * + */ + +#include <elf.h> +#include <gelf.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <linux/list.h> +#include <libunwind.h> +#include <libunwind-ptrace.h> +#include "thread.h" +#include "session.h" +#include "perf_regs.h" +#include "unwind.h" +#include "util.h" + +extern int +UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as, + unw_word_t ip, + unw_dyn_info_t *di, + unw_proc_info_t *pi, + int need_unwind_info, void *arg); + +#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table) + +#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */ +#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */ + +/* Pointer-encoding formats: */ +#define DW_EH_PE_omit 0xff +#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */ +#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */ +#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */ +#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */ +#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */ + +/* Pointer-encoding application: */ +#define DW_EH_PE_absptr 0x00 /* absolute value */ +#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */ + +/* + * The following are not documented by LSB v1.3, yet they are used by + * GCC, presumably they aren't documented by LSB since they aren't + * used on Linux: + */ +#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */ +#define DW_EH_PE_aligned 0x50 /* aligned pointer */ + +/* Flags intentionaly not handled, since they're not needed: + * #define DW_EH_PE_indirect 0x80 + * #define DW_EH_PE_uleb128 0x01 + * #define DW_EH_PE_udata2 0x02 + * #define DW_EH_PE_sleb128 0x09 + * #define DW_EH_PE_sdata2 0x0a + * #define DW_EH_PE_textrel 0x20 + * #define DW_EH_PE_datarel 0x30 + */ + +struct unwind_info { + struct perf_sample *sample; + struct machine *machine; + struct thread *thread; + u64 sample_uregs; +}; + +#define dw_read(ptr, type, end) ({ \ + type *__p = (type *) ptr; \ + type __v; \ + if ((__p + 1) > (type *) end) \ + return -EINVAL; \ + __v = *__p++; \ + ptr = (typeof(ptr)) __p; \ + __v; \ + }) + +static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val, + u8 encoding) +{ + u8 *cur = *p; + *val = 0; + + switch (encoding) { + case DW_EH_PE_omit: + *val = 0; + goto out; + case DW_EH_PE_ptr: + *val = dw_read(cur, unsigned long, end); + goto out; + default: + break; + } + + switch (encoding & DW_EH_PE_APPL_MASK) { + case DW_EH_PE_absptr: + break; + case DW_EH_PE_pcrel: + *val = (unsigned long) cur; + break; + default: + return -EINVAL; + } + + if ((encoding & 0x07) == 0x00) + encoding |= DW_EH_PE_udata4; + + switch (encoding & DW_EH_PE_FORMAT_MASK) { + case DW_EH_PE_sdata4: + *val += dw_read(cur, s32, end); + break; + case DW_EH_PE_udata4: + *val += dw_read(cur, u32, end); + break; + case DW_EH_PE_sdata8: + *val += dw_read(cur, s64, end); + break; + case DW_EH_PE_udata8: + *val += dw_read(cur, u64, end); + break; + default: + return -EINVAL; + } + + out: + *p = cur; + return 0; +} + +#define dw_read_encoded_value(ptr, end, enc) ({ \ + u64 __v; \ + if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \ + return -EINVAL; \ + } \ + __v; \ + }) + +static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, + GElf_Shdr *shp, const char *name) +{ + Elf_Scn *sec = NULL; + + while ((sec = elf_nextscn(elf, sec)) != NULL) { + char *str; + + gelf_getshdr(sec, shp); + str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name); + if (!strcmp(name, str)) + break; + } + + return sec; +} + +static u64 elf_section_offset(int fd, const char *name) +{ + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + u64 offset = 0; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + return 0; + + do { + if (gelf_getehdr(elf, &ehdr) == NULL) + break; + + if (!elf_section_by_name(elf, &ehdr, &shdr, name)) + break; + + offset = shdr.sh_offset; + } while (0); + + elf_end(elf); + return offset; +} + +struct table_entry { + u32 start_ip_offset; + u32 fde_offset; +}; + +struct eh_frame_hdr { + unsigned char version; + unsigned char eh_frame_ptr_enc; + unsigned char fde_count_enc; + unsigned char table_enc; + + /* + * The rest of the header is variable-length and consists of the + * following members: + * + * encoded_t eh_frame_ptr; + * encoded_t fde_count; + */ + + /* A single encoded pointer should not be more than 8 bytes. */ + u64 enc[2]; + + /* + * struct { + * encoded_t start_ip; + * encoded_t fde_addr; + * } binary_search_table[fde_count]; + */ + char data[0]; +} __packed; + +static int unwind_spec_ehframe(struct dso *dso, struct machine *machine, + u64 offset, u64 *table_data, u64 *segbase, + u64 *fde_count) +{ + struct eh_frame_hdr hdr; + u8 *enc = (u8 *) &hdr.enc; + u8 *end = (u8 *) &hdr.data; + ssize_t r; + + r = dso__data_read_offset(dso, machine, offset, + (u8 *) &hdr, sizeof(hdr)); + if (r != sizeof(hdr)) + return -EINVAL; + + /* We dont need eh_frame_ptr, just skip it. */ + dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc); + + *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc); + *segbase = offset; + *table_data = (enc - (u8 *) &hdr) + offset; + return 0; +} + +static int read_unwind_spec(struct dso *dso, struct machine *machine, + u64 *table_data, u64 *segbase, u64 *fde_count) +{ + int ret = -EINVAL, fd; + u64 offset; + + fd = dso__data_fd(dso, machine); + if (fd < 0) + return -EINVAL; + + offset = elf_section_offset(fd, ".eh_frame_hdr"); + close(fd); + + if (offset) + ret = unwind_spec_ehframe(dso, machine, offset, + table_data, segbase, + fde_count); + + /* TODO .debug_frame check if eh_frame_hdr fails */ + return ret; +} + +static struct map *find_map(unw_word_t ip, struct unwind_info *ui) +{ + struct addr_location al; + + thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, &al); + return al.map; +} + +static int +find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, + int need_unwind_info, void *arg) +{ + struct unwind_info *ui = arg; + struct map *map; + unw_dyn_info_t di; + u64 table_data, segbase, fde_count; + + map = find_map(ip, ui); + if (!map || !map->dso) + return -EINVAL; + + pr_debug("unwind: find_proc_info dso %s\n", map->dso->name); + + if (read_unwind_spec(map->dso, ui->machine, + &table_data, &segbase, &fde_count)) + return -EINVAL; + + memset(&di, 0, sizeof(di)); + di.format = UNW_INFO_FORMAT_REMOTE_TABLE; + di.start_ip = map->start; + di.end_ip = map->end; + di.u.rti.segbase = map->start + segbase; + di.u.rti.table_data = map->start + table_data; + di.u.rti.table_len = fde_count * sizeof(struct table_entry) + / sizeof(unw_word_t); + return dwarf_search_unwind_table(as, ip, &di, pi, + need_unwind_info, arg); +} + +static int access_fpreg(unw_addr_space_t __used as, unw_regnum_t __used num, + unw_fpreg_t __used *val, int __used __write, + void __used *arg) +{ + pr_err("unwind: access_fpreg unsupported\n"); + return -UNW_EINVAL; +} + +static int get_dyn_info_list_addr(unw_addr_space_t __used as, + unw_word_t __used *dil_addr, + void __used *arg) +{ + return -UNW_ENOINFO; +} + +static int resume(unw_addr_space_t __used as, unw_cursor_t __used *cu, + void __used *arg) +{ + pr_err("unwind: resume unsupported\n"); + return -UNW_EINVAL; +} + +static int +get_proc_name(unw_addr_space_t __used as, unw_word_t __used addr, + char __used *bufp, size_t __used buf_len, + unw_word_t __used *offp, void __used *arg) +{ + pr_err("unwind: get_proc_name unsupported\n"); + return -UNW_EINVAL; +} + +static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, + unw_word_t *data) +{ + struct addr_location al; + ssize_t size; + + thread__find_addr_map(ui->thread, ui->machine, PERF_RECORD_MISC_USER, + MAP__FUNCTION, addr, &al); + if (!al.map) { + pr_debug("unwind: no map for %lx\n", (unsigned long)addr); + return -1; + } + + if (!al.map->dso) + return -1; + + size = dso__data_read_addr(al.map->dso, al.map, ui->machine, + addr, (u8 *) data, sizeof(*data)); + + return !(size == sizeof(*data)); +} + +static int reg_value(unw_word_t *valp, struct regs_dump *regs, int id, + u64 sample_regs) +{ + int i, idx = 0; + + if (!(sample_regs & (1 << id))) + return -EINVAL; + + for (i = 0; i < id; i++) { + if (sample_regs & (1 << i)) + idx++; + } + + *valp = regs->regs[idx]; + return 0; +} + +static int access_mem(unw_addr_space_t __used as, + unw_word_t addr, unw_word_t *valp, + int __write, void *arg) +{ + struct unwind_info *ui = arg; + struct stack_dump *stack = &ui->sample->user_stack; + unw_word_t start, end; + int offset; + int ret; + + /* Don't support write, probably not needed. */ + if (__write || !stack || !ui->sample->user_regs.regs) { + *valp = 0; + return 0; + } + + ret = reg_value(&start, &ui->sample->user_regs, PERF_REG_SP, + ui->sample_uregs); + if (ret) + return ret; + + end = start + stack->size; + + /* Check overflow. */ + if (addr + sizeof(unw_word_t) < addr) + return -EINVAL; + + if (addr < start || addr + sizeof(unw_word_t) >= end) { + ret = access_dso_mem(ui, addr, valp); + if (ret) { + pr_debug("unwind: access_mem %p not inside range %p-%p\n", + (void *)addr, (void *)start, (void *)end); + *valp = 0; + return ret; + } + return 0; + } + + offset = addr - start; + *valp = *(unw_word_t *)&stack->data[offset]; + pr_debug("unwind: access_mem addr %p, val %lx, offset %d\n", + (void *)addr, (unsigned long)*valp, offset); + return 0; +} + +static int access_reg(unw_addr_space_t __used as, + unw_regnum_t regnum, unw_word_t *valp, + int __write, void *arg) +{ + struct unwind_info *ui = arg; + int id, ret; + + /* Don't support write, I suspect we don't need it. */ + if (__write) { + pr_err("unwind: access_reg w %d\n", regnum); + return 0; + } + + if (!ui->sample->user_regs.regs) { + *valp = 0; + return 0; + } + + id = unwind__arch_reg_id(regnum); + if (id < 0) + return -EINVAL; + + ret = reg_value(valp, &ui->sample->user_regs, id, ui->sample_uregs); + if (ret) { + pr_err("unwind: can't read reg %d\n", regnum); + return ret; + } + + pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp); + return 0; +} + +static void put_unwind_info(unw_addr_space_t __used as, + unw_proc_info_t *pi __used, + void *arg __used) +{ + pr_debug("unwind: put_unwind_info called\n"); +} + +static int entry(u64 ip, struct thread *thread, struct machine *machine, + unwind_entry_cb_t cb, void *arg) +{ + struct unwind_entry e; + struct addr_location al; + + thread__find_addr_location(thread, machine, + PERF_RECORD_MISC_USER, + MAP__FUNCTION, ip, &al, NULL); + + e.ip = ip; + e.map = al.map; + e.sym = al.sym; + + pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", + al.sym ? al.sym->name : "''", + ip, + al.map ? al.map->map_ip(al.map, ip) : (u64) 0); + + return cb(&e, arg); +} + +static void display_error(int err) +{ + switch (err) { + case UNW_EINVAL: + pr_err("unwind: Only supports local.\n"); + break; + case UNW_EUNSPEC: + pr_err("unwind: Unspecified error.\n"); + break; + case UNW_EBADREG: + pr_err("unwind: Register unavailable.\n"); + break; + default: + break; + } +} + +static unw_accessors_t accessors = { + .find_proc_info = find_proc_info, + .put_unwind_info = put_unwind_info, + .get_dyn_info_list_addr = get_dyn_info_list_addr, + .access_mem = access_mem, + .access_reg = access_reg, + .access_fpreg = access_fpreg, + .resume = resume, + .get_proc_name = get_proc_name, +}; + +static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, + void *arg) +{ + unw_addr_space_t addr_space; + unw_cursor_t c; + int ret; + + addr_space = unw_create_addr_space(&accessors, 0); + if (!addr_space) { + pr_err("unwind: Can't create unwind address space.\n"); + return -ENOMEM; + } + + ret = unw_init_remote(&c, addr_space, ui); + if (ret) + display_error(ret); + + while (!ret && (unw_step(&c) > 0)) { + unw_word_t ip; + + unw_get_reg(&c, UNW_REG_IP, &ip); + ret = entry(ip, ui->thread, ui->machine, cb, arg); + } + + unw_destroy_addr_space(addr_space); + return ret; +} + +int unwind__get_entries(unwind_entry_cb_t cb, void *arg, + struct machine *machine, struct thread *thread, + u64 sample_uregs, struct perf_sample *data) +{ + unw_word_t ip; + struct unwind_info ui = { + .sample = data, + .sample_uregs = sample_uregs, + .thread = thread, + .machine = machine, + }; + int ret; + + if (!data->user_regs.regs) + return -EINVAL; + + ret = reg_value(&ip, &data->user_regs, PERF_REG_IP, sample_uregs); + if (ret) + return ret; + + ret = entry(ip, thread, machine, cb, arg); + if (ret) + return -ENOMEM; + + return get_entries(&ui, cb, arg); +} diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h new file mode 100644 index 000000000000..919bd6ad8501 --- /dev/null +++ b/tools/perf/util/unwind.h @@ -0,0 +1,34 @@ +#ifndef __UNWIND_H +#define __UNWIND_H + +#include "types.h" +#include "event.h" +#include "symbol.h" + +struct unwind_entry { + struct map *map; + struct symbol *sym; + u64 ip; +}; + +typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg); + +#ifndef NO_LIBUNWIND_SUPPORT +int unwind__get_entries(unwind_entry_cb_t cb, void *arg, + struct machine *machine, + struct thread *thread, + u64 sample_uregs, + struct perf_sample *data); +int unwind__arch_reg_id(int regnum); +#else +static inline int +unwind__get_entries(unwind_entry_cb_t cb __used, void *arg __used, + struct machine *machine __used, + struct thread *thread __used, + u64 sample_uregs __used, + struct perf_sample *data __used) +{ + return 0; +} +#endif /* NO_LIBUNWIND_SUPPORT */ +#endif /* __UNWIND_H */ diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index d03599fbe78b..1b8775c3707d 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -1,6 +1,9 @@ #include "../perf.h" #include "util.h" #include <sys/mman.h> +#include <execinfo.h> +#include <stdio.h> +#include <stdlib.h> /* * XXX We need to find a better place for these things... @@ -158,3 +161,19 @@ size_t hex_width(u64 v) return n; } + +/* Obtain a backtrace and print it to stdout. */ +void dump_stack(void) +{ + void *array[16]; + size_t size = backtrace(array, ARRAY_SIZE(array)); + char **strings = backtrace_symbols(array, size); + size_t i; + + printf("Obtained %zd stack frames.\n", size); + + for (i = 0; i < size; i++) + printf("%s\n", strings[i]); + + free(strings); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b13c7331eaf8..00a93a91a235 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -266,4 +266,6 @@ size_t hex_width(u64 v); char *rtrim(char *s); +void dump_stack(void); + #endif diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index bde8521d56bb..96ce80a3743b 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -1,6 +1,8 @@ ifeq ("$(origin O)", "command line") - OUTPUT := $(O)/ - COMMAND_O := O=$(O) + dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) + ABSOLUTE_O := $(shell cd $(O) ; pwd) + OUTPUT := $(ABSOLUTE_O)/ + COMMAND_O := O=$(ABSOLUTE_O) endif ifneq ($(OUTPUT),) |