diff options
Diffstat (limited to 'arch')
106 files changed, 2598 insertions, 1506 deletions
diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h index 9a1db20e032a..63a481fbbed4 100644 --- a/arch/arm/include/asm/bitops.h +++ b/arch/arm/include/asm/bitops.h @@ -237,6 +237,7 @@ extern int _find_next_bit_be(const unsigned long *p, int size, int offset); #if __LINUX_ARM_ARCH__ < 5 #include <asm-generic/bitops/ffz.h> +#include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/__ffs.h> #include <asm-generic/bitops/fls.h> #include <asm-generic/bitops/ffs.h> @@ -277,16 +278,19 @@ static inline int constant_fls(int x) * the clz instruction for much better code efficiency. */ -#define __fls(x) \ - ( __builtin_constant_p(x) ? constant_fls(x) : \ - ({ int __r; asm("clz\t%0, %1" : "=r"(__r) : "r"(x) : "cc"); 32-__r; }) ) - -/* Implement fls() in C so that 64-bit args are suitably truncated */ static inline int fls(int x) { - return __fls(x); + int ret; + + if (__builtin_constant_p(x)) + return constant_fls(x); + + asm("clz\t%0, %1" : "=r" (ret) : "r" (x) : "cc"); + ret = 32 - ret; + return ret; } +#define __fls(x) (fls(x) - 1) #define ffs(x) ({ unsigned long __t = (x); fls(__t & -__t); }) #define __ffs(x) (ffs(x) - 1) #define ffz(x) __ffs( ~(x) ) diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 517a4d6ffc74..6ff33790f47b 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -23,7 +23,7 @@ #include <asm/types.h> #ifdef __KERNEL__ -#define STACK_TOP ((current->personality == PER_LINUX_32BIT) ? \ +#define STACK_TOP ((current->personality & ADDR_LIMIT_32BIT) ? \ TASK_SIZE : TASK_SIZE_26) #define STACK_TOP_MAX TASK_SIZE #endif diff --git a/arch/arm/mach-omap1/io.c b/arch/arm/mach-omap1/io.c index b3bd8ca85118..4c3e582f3d3c 100644 --- a/arch/arm/mach-omap1/io.c +++ b/arch/arm/mach-omap1/io.c @@ -128,7 +128,7 @@ void __init omap1_map_common_io(void) * Common low-level hardware init for omap1. This should only get called from * board specific init. */ -void __init omap1_init_common_hw() +void __init omap1_init_common_hw(void) { /* REVISIT: Refer to OMAP5910 Errata, Advisory SYS_1: "Timeout Abort * on a Posted Write in the TIPB Bridge". diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 133e65d166b3..2d5884ce0435 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -70,6 +70,10 @@ static unsigned long ai_dword; static unsigned long ai_multi; static int ai_usermode; +#define UM_WARN (1 << 0) +#define UM_FIXUP (1 << 1) +#define UM_SIGNAL (1 << 2) + #ifdef CONFIG_PROC_FS static const char *usermode_action[] = { "ignored", @@ -754,7 +758,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) user: ai_user += 1; - if (ai_usermode & 1) + if (ai_usermode & UM_WARN) printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx " "Address=0x%08lx FSR 0x%03x\n", current->comm, task_pid_nr(current), instrptr, @@ -762,10 +766,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) thumb_mode(regs) ? tinstr : instr, addr, fsr); - if (ai_usermode & 2) + if (ai_usermode & UM_FIXUP) goto fixup; - if (ai_usermode & 4) + if (ai_usermode & UM_SIGNAL) force_sig(SIGBUS, current); else set_cr(cr_no_alignment); @@ -796,6 +800,22 @@ static int __init alignment_init(void) res->write_proc = proc_alignment_write; #endif + /* + * ARMv6 and later CPUs can perform unaligned accesses for + * most single load and store instructions up to word size. + * LDM, STM, LDRD and STRD still need to be handled. + * + * Ignoring the alignment fault is not an option on these + * CPUs since we spin re-faulting the instruction without + * making any progress. + */ + if (cpu_architecture() >= CPU_ARCH_ARMv6 && (cr_alignment & CR_U)) { + cr_alignment &= ~CR_A; + cr_no_alignment &= ~CR_A; + set_cr(cr_alignment); + ai_usermode = UM_FIXUP; + } + hook_fault_code(1, do_alignment, SIGILL, "alignment exception"); hook_fault_code(3, do_alignment, SIGILL, "alignment exception"); diff --git a/arch/arm/plat-omap/include/mach/omapfb.h b/arch/arm/plat-omap/include/mach/omapfb.h index ec67fb428607..7b74d1255e0b 100644 --- a/arch/arm/plat-omap/include/mach/omapfb.h +++ b/arch/arm/plat-omap/include/mach/omapfb.h @@ -353,8 +353,8 @@ struct omapfb_device { u32 pseudo_palette[17]; struct lcd_panel *panel; /* LCD panel */ - struct lcd_ctrl *ctrl; /* LCD controller */ - struct lcd_ctrl *int_ctrl; /* internal LCD ctrl */ + const struct lcd_ctrl *ctrl; /* LCD controller */ + const struct lcd_ctrl *int_ctrl; /* internal LCD ctrl */ struct lcd_ctrl_extif *ext_if; /* LCD ctrl external interface */ struct device *dev; diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c index 9f9a921829c0..dcd9d16da2e9 100644 --- a/arch/arm/plat-omap/sram.c +++ b/arch/arm/plat-omap/sram.c @@ -255,7 +255,7 @@ void omap_sram_reprogram_clock(u32 dpllctl, u32 ckctl) if (!_omap_sram_reprogram_clock) omap_sram_error(); - return _omap_sram_reprogram_clock(dpllctl, ckctl); + _omap_sram_reprogram_clock(dpllctl, ckctl); } int __init omap1_sram_init(void) @@ -282,8 +282,8 @@ void omap2_sram_ddr_init(u32 *slow_dll_ctrl, u32 fast_dll_ctrl, if (!_omap2_sram_ddr_init) omap_sram_error(); - return _omap2_sram_ddr_init(slow_dll_ctrl, fast_dll_ctrl, - base_cs, force_unlock); + _omap2_sram_ddr_init(slow_dll_ctrl, fast_dll_ctrl, + base_cs, force_unlock); } static void (*_omap2_sram_reprogram_sdrc)(u32 perf_level, u32 dll_val, @@ -294,7 +294,7 @@ void omap2_sram_reprogram_sdrc(u32 perf_level, u32 dll_val, u32 mem_type) if (!_omap2_sram_reprogram_sdrc) omap_sram_error(); - return _omap2_sram_reprogram_sdrc(perf_level, dll_val, mem_type); + _omap2_sram_reprogram_sdrc(perf_level, dll_val, mem_type); } static u32 (*_omap2_set_prcm)(u32 dpll_ctrl_val, u32 sdrc_rfr_val, int bypass); diff --git a/arch/arm/plat-orion/pcie.c b/arch/arm/plat-orion/pcie.c index 883902fead89..d41d41d78ad9 100644 --- a/arch/arm/plat-orion/pcie.c +++ b/arch/arm/plat-orion/pcie.c @@ -35,7 +35,7 @@ #define PCIE_CONF_REG(r) ((((r) & 0xf00) << 16) | ((r) & 0xfc)) #define PCIE_CONF_BUS(b) (((b) & 0xff) << 16) #define PCIE_CONF_DEV(d) (((d) & 0x1f) << 11) -#define PCIE_CONF_FUNC(f) (((f) & 0x3) << 8) +#define PCIE_CONF_FUNC(f) (((f) & 0x7) << 8) #define PCIE_CONF_DATA_OFF 0x18fc #define PCIE_MASK_OFF 0x1910 #define PCIE_CTRL_OFF 0x1a00 diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 6bd91ed7cd03..7fa8f615ba6e 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -99,7 +99,7 @@ config GENERIC_IOMAP bool default y -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig index e05f9e1d3faa..27eb67604c53 100644 --- a/arch/ia64/configs/generic_defconfig +++ b/arch/ia64/configs/generic_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.27-rc1 -# Mon Aug 4 15:38:01 2008 +# Linux kernel version: 2.6.28-rc7 +# Mon Dec 8 08:12:07 2008 # CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" @@ -26,6 +26,7 @@ CONFIG_LOG_BUF_SHIFT=20 CONFIG_CGROUPS=y # CONFIG_CGROUP_DEBUG is not set # CONFIG_CGROUP_NS is not set +# CONFIG_CGROUP_FREEZER is not set # CONFIG_CGROUP_DEVICE is not set CONFIG_CPUSETS=y # CONFIG_GROUP_SCHED is not set @@ -46,7 +47,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_SYSCTL=y # CONFIG_EMBEDDED is not set CONFIG_SYSCTL_SYSCALL=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y # CONFIG_KALLSYMS_EXTRA_PASS is not set @@ -63,7 +63,9 @@ CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y +CONFIG_AIO=y CONFIG_VM_EVENT_COUNTERS=y +CONFIG_PCI_QUIRKS=y CONFIG_SLUB_DEBUG=y # CONFIG_SLAB is not set CONFIG_SLUB=y @@ -72,15 +74,11 @@ CONFIG_SLUB=y # CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set -# CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is not set -# CONFIG_HAVE_IOREMAP_PROT is not set CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y -# CONFIG_HAVE_ARCH_TRACEHOOK is not set +CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_DMA_ATTRS=y CONFIG_USE_GENERIC_SMP_HELPERS=y -# CONFIG_HAVE_CLK is not set -CONFIG_PROC_PAGE_MONITOR=y # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -113,6 +111,7 @@ CONFIG_DEFAULT_AS=y # CONFIG_DEFAULT_NOOP is not set CONFIG_DEFAULT_IOSCHED="anticipatory" CONFIG_CLASSIC_RCU=y +# CONFIG_FREEZER is not set # # Processor type and features @@ -125,8 +124,6 @@ CONFIG_MMU=y CONFIG_SWIOTLB=y CONFIG_IOMMU_HELPER=y CONFIG_RWSEM_XCHGADD_ALGORITHM=y -# CONFIG_ARCH_HAS_ILOG2_U32 is not set -# CONFIG_ARCH_HAS_ILOG2_U64 is not set CONFIG_HUGETLB_PAGE_SIZE_VARIABLE=y CONFIG_GENERIC_FIND_NEXT_BIT=y CONFIG_GENERIC_CALIBRATE_DELAY=y @@ -139,13 +136,16 @@ CONFIG_GENERIC_IOMAP=y CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y CONFIG_IA64_UNCACHED_ALLOCATOR=y CONFIG_AUDIT_ARCH=y +# CONFIG_PARAVIRT_GUEST is not set CONFIG_IA64_GENERIC=y # CONFIG_IA64_DIG is not set +# CONFIG_IA64_DIG_VTD is not set # CONFIG_IA64_HP_ZX1 is not set # CONFIG_IA64_HP_ZX1_SWIOTLB is not set # CONFIG_IA64_SGI_SN2 is not set # CONFIG_IA64_SGI_UV is not set # CONFIG_IA64_HP_SIM is not set +# CONFIG_IA64_XEN_GUEST is not set # CONFIG_ITANIUM is not set CONFIG_MCKINLEY=y # CONFIG_IA64_PAGE_SIZE_4KB is not set @@ -182,16 +182,17 @@ CONFIG_DISCONTIGMEM_MANUAL=y CONFIG_DISCONTIGMEM=y CONFIG_FLAT_NODE_MEM_MAP=y CONFIG_NEED_MULTIPLE_NODES=y -# CONFIG_SPARSEMEM_STATIC is not set CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MIGRATION=y CONFIG_RESOURCES_64BIT=y +CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_ZONE_DMA_FLAG=1 CONFIG_BOUNCE=y CONFIG_NR_QUICK=1 CONFIG_VIRT_TO_BUS=y +CONFIG_UNEVICTABLE_LRU=y CONFIG_MMU_NOTIFIER=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_DISCONTIGMEM_ENABLE=y @@ -231,12 +232,12 @@ CONFIG_EFI_VARS=y CONFIG_EFI_PCDP=y CONFIG_DMIID=y CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_HAVE_AOUT is not set CONFIG_BINFMT_MISC=m -# CONFIG_DMAR is not set - # -# Power management and ACPI +# Power management and ACPI options # CONFIG_PM=y # CONFIG_PM_DEBUG is not set @@ -248,7 +249,6 @@ CONFIG_ACPI_PROC_EVENT=y CONFIG_ACPI_BUTTON=m CONFIG_ACPI_FAN=m CONFIG_ACPI_DOCK=y -# CONFIG_ACPI_BAY is not set CONFIG_ACPI_PROCESSOR=m CONFIG_ACPI_HOTPLUG_CPU=y CONFIG_ACPI_THERMAL=m @@ -256,9 +256,7 @@ CONFIG_ACPI_NUMA=y # CONFIG_ACPI_CUSTOM_DSDT is not set CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set -CONFIG_ACPI_EC=y # CONFIG_ACPI_PCI_SLOT is not set -CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y CONFIG_ACPI_CONTAINER=m @@ -275,7 +273,7 @@ CONFIG_PCI_DOMAINS=y CONFIG_PCI_SYSCALL=y # CONFIG_PCIEPORTBUS is not set CONFIG_ARCH_SUPPORTS_MSI=y -# CONFIG_PCI_MSI is not set +CONFIG_PCI_MSI=y CONFIG_PCI_LEGACY=y # CONFIG_PCI_DEBUG is not set CONFIG_HOTPLUG_PCI=m @@ -286,6 +284,7 @@ CONFIG_HOTPLUG_PCI_ACPI=m # CONFIG_HOTPLUG_PCI_SHPC is not set # CONFIG_HOTPLUG_PCI_SGI is not set # CONFIG_PCCARD is not set +CONFIG_DMAR=y CONFIG_NET=y # @@ -333,6 +332,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set # CONFIG_DECNET is not set # CONFIG_LLC2 is not set @@ -353,11 +353,10 @@ CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set - -# -# Wireless -# +# CONFIG_PHONET is not set +CONFIG_WIRELESS=y # CONFIG_CFG80211 is not set +CONFIG_WIRELESS_OLD_REGULATORY=y # CONFIG_WIRELESS_EXT is not set # CONFIG_MAC80211 is not set # CONFIG_IEEE80211 is not set @@ -385,7 +384,7 @@ CONFIG_PROC_EVENTS=y # CONFIG_MTD is not set # CONFIG_PARPORT is not set CONFIG_PNP=y -# CONFIG_PNP_DEBUG is not set +# CONFIG_PNP_DEBUG_MESSAGES is not set # # Protocols @@ -419,10 +418,9 @@ CONFIG_SGI_XP=m # CONFIG_HP_ILO is not set CONFIG_SGI_GRU=m # CONFIG_SGI_GRU_DEBUG is not set +# CONFIG_C2PORT is not set CONFIG_HAVE_IDE=y CONFIG_IDE=y -CONFIG_IDE_MAX_HWIFS=4 -CONFIG_BLK_DEV_IDE=y # # Please see Documentation/ide/ide.txt for help/info on IDE drives @@ -430,12 +428,12 @@ CONFIG_BLK_DEV_IDE=y CONFIG_IDE_TIMINGS=y CONFIG_IDE_ATAPI=y # CONFIG_BLK_DEV_IDE_SATA is not set -CONFIG_BLK_DEV_IDEDISK=y -# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_IDE_GD=y +CONFIG_IDE_GD_ATA=y +# CONFIG_IDE_GD_ATAPI is not set CONFIG_BLK_DEV_IDECD=y CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y # CONFIG_BLK_DEV_IDETAPE is not set -CONFIG_BLK_DEV_IDEFLOPPY=y CONFIG_BLK_DEV_IDESCSI=m # CONFIG_BLK_DEV_IDEACPI is not set # CONFIG_IDE_TASK_IOCTL is not set @@ -705,6 +703,9 @@ CONFIG_TULIP=m # CONFIG_IBM_NEW_EMAC_RGMII is not set # CONFIG_IBM_NEW_EMAC_TAH is not set # CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set +# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set +# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set CONFIG_NET_PCI=y # CONFIG_PCNET32 is not set # CONFIG_AMD8111_ETH is not set @@ -725,11 +726,11 @@ CONFIG_E100=m # CONFIG_TLAN is not set # CONFIG_VIA_RHINE is not set # CONFIG_SC92031 is not set +# CONFIG_ATL2 is not set CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set CONFIG_E1000=y -# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set # CONFIG_E1000E is not set # CONFIG_IP1000 is not set CONFIG_IGB=y @@ -747,18 +748,22 @@ CONFIG_TIGON3=y # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set # CONFIG_ATL1E is not set +# CONFIG_JME is not set CONFIG_NETDEV_10000=y # CONFIG_CHELSIO_T1 is not set # CONFIG_CHELSIO_T3 is not set +# CONFIG_ENIC is not set # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set # CONFIG_MYRI10GE is not set # CONFIG_NETXEN_NIC is not set # CONFIG_NIU is not set +# CONFIG_MLX4_EN is not set # CONFIG_MLX4_CORE is not set # CONFIG_TEHUTI is not set # CONFIG_BNX2X is not set +# CONFIG_QLGE is not set # CONFIG_SFC is not set # CONFIG_TR is not set @@ -826,9 +831,11 @@ CONFIG_MOUSE_PS2_LOGIPS2PP=y CONFIG_MOUSE_PS2_SYNAPTICS=y CONFIG_MOUSE_PS2_LIFEBOOK=y CONFIG_MOUSE_PS2_TRACKPOINT=y +# CONFIG_MOUSE_PS2_ELANTECH is not set # CONFIG_MOUSE_PS2_TOUCHKIT is not set # CONFIG_MOUSE_SERIAL is not set # CONFIG_MOUSE_APPLETOUCH is not set +# CONFIG_MOUSE_BCM5974 is not set # CONFIG_MOUSE_VSXXXAA is not set # CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TABLET is not set @@ -942,15 +949,16 @@ CONFIG_HWMON=y # CONFIG_SENSORS_VT8231 is not set # CONFIG_SENSORS_W83627HF is not set # CONFIG_SENSORS_W83627EHF is not set +# CONFIG_SENSORS_LIS3LV02D is not set # CONFIG_HWMON_DEBUG_CHIP is not set CONFIG_THERMAL=m # CONFIG_THERMAL_HWMON is not set # CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y # # Sonics Silicon Backplane # -CONFIG_SSB_POSSIBLE=y # CONFIG_SSB is not set # @@ -959,6 +967,8 @@ CONFIG_SSB_POSSIBLE=y # CONFIG_MFD_CORE is not set # CONFIG_MFD_SM501 is not set # CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_REGULATOR is not set # # Multimedia devices @@ -1009,6 +1019,7 @@ CONFIG_VGA_CONSOLE=y # CONFIG_VGACON_SOFT_SCROLLBACK is not set CONFIG_DUMMY_CONSOLE=y CONFIG_SOUND=m +CONFIG_SOUND_OSS_CORE=y CONFIG_SND=m CONFIG_SND_TIMER=m CONFIG_SND_PCM=m @@ -1113,8 +1124,7 @@ CONFIG_HID=y # USB Input Devices # CONFIG_USB_HID=m -# CONFIG_USB_HIDINPUT_POWERBOOK is not set -# CONFIG_HID_FF is not set +# CONFIG_HID_PID is not set # CONFIG_USB_HIDDEV is not set # @@ -1122,6 +1132,34 @@ CONFIG_USB_HID=m # # CONFIG_USB_KBD is not set # CONFIG_USB_MOUSE is not set + +# +# Special HID drivers +# +CONFIG_HID_COMPAT=y +CONFIG_HID_A4TECH=m +CONFIG_HID_APPLE=m +CONFIG_HID_BELKIN=m +CONFIG_HID_BRIGHT=m +CONFIG_HID_CHERRY=m +CONFIG_HID_CHICONY=m +CONFIG_HID_CYPRESS=m +CONFIG_HID_DELL=m +CONFIG_HID_EZKEY=m +CONFIG_HID_GYRATION=m +CONFIG_HID_LOGITECH=m +# CONFIG_LOGITECH_FF is not set +# CONFIG_LOGIRUMBLEPAD2_FF is not set +CONFIG_HID_MICROSOFT=m +CONFIG_HID_MONTEREY=m +CONFIG_HID_PANTHERLORD=m +# CONFIG_PANTHERLORD_FF is not set +CONFIG_HID_PETALYNX=m +CONFIG_HID_SAMSUNG=m +CONFIG_HID_SONY=m +CONFIG_HID_SUNPLUS=m +# CONFIG_THRUSTMASTER_FF is not set +# CONFIG_ZEROPLUS_FF is not set CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y @@ -1138,6 +1176,9 @@ CONFIG_USB_DEVICE_CLASS=y # CONFIG_USB_DYNAMIC_MINORS is not set # CONFIG_USB_SUSPEND is not set # CONFIG_USB_OTG is not set +CONFIG_USB_MON=y +# CONFIG_USB_WUSB is not set +# CONFIG_USB_WUSB_CBAF is not set # # USB Host Controller Drivers @@ -1155,6 +1196,12 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_UHCI_HCD=m # CONFIG_USB_SL811_HCD is not set # CONFIG_USB_R8A66597_HCD is not set +# CONFIG_USB_WHCI_HCD is not set +# CONFIG_USB_HWA_HCD is not set + +# +# Enable Host or Gadget support to see Inventra options +# # # USB Device Class drivers @@ -1162,13 +1209,14 @@ CONFIG_USB_UHCI_HCD=m # CONFIG_USB_ACM is not set # CONFIG_USB_PRINTER is not set # CONFIG_USB_WDM is not set +# CONFIG_USB_TMC is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; # # -# may also be needed; see USB_STORAGE Help for more information +# see USB_STORAGE Help for more information # CONFIG_USB_STORAGE=m # CONFIG_USB_STORAGE_DEBUG is not set @@ -1191,7 +1239,6 @@ CONFIG_USB_STORAGE=m # # CONFIG_USB_MDC800 is not set # CONFIG_USB_MICROTEK is not set -CONFIG_USB_MON=y # # USB port drivers @@ -1204,7 +1251,7 @@ CONFIG_USB_MON=y # CONFIG_USB_EMI62 is not set # CONFIG_USB_EMI26 is not set # CONFIG_USB_ADUTUX is not set -# CONFIG_USB_AUERSWALD is not set +# CONFIG_USB_SEVSEG is not set # CONFIG_USB_RIO500 is not set # CONFIG_USB_LEGOTOWER is not set # CONFIG_USB_LCD is not set @@ -1222,7 +1269,9 @@ CONFIG_USB_MON=y # CONFIG_USB_IOWARRIOR is not set # CONFIG_USB_TEST is not set # CONFIG_USB_ISIGHTFW is not set +# CONFIG_USB_VST is not set # CONFIG_USB_GADGET is not set +# CONFIG_UWB is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set @@ -1246,6 +1295,15 @@ CONFIG_INFINIBAND_IPOIB_DEBUG=y # CONFIG_RTC_CLASS is not set # CONFIG_DMADEVICES is not set # CONFIG_UIO is not set +# CONFIG_STAGING is not set +CONFIG_STAGING_EXCLUDE_BUILD=y + +# +# HP Simulator drivers +# +# CONFIG_HP_SIMETH is not set +# CONFIG_HP_SIMSERIAL is not set +# CONFIG_HP_SIMSCSI is not set CONFIG_MSPEC=m # @@ -1260,7 +1318,7 @@ CONFIG_EXT3_FS=y CONFIG_EXT3_FS_XATTR=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -# CONFIG_EXT4DEV_FS is not set +# CONFIG_EXT4_FS is not set CONFIG_JBD=y CONFIG_FS_MBCACHE=y CONFIG_REISERFS_FS=y @@ -1271,6 +1329,7 @@ CONFIG_REISERFS_FS_POSIX_ACL=y CONFIG_REISERFS_FS_SECURITY=y # CONFIG_JFS_FS is not set CONFIG_FS_POSIX_ACL=y +CONFIG_FILE_LOCKING=y CONFIG_XFS_FS=y # CONFIG_XFS_QUOTA is not set # CONFIG_XFS_POSIX_ACL is not set @@ -1282,8 +1341,8 @@ CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y # CONFIG_QUOTA is not set -CONFIG_AUTOFS_FS=y -CONFIG_AUTOFS4_FS=y +CONFIG_AUTOFS_FS=m +CONFIG_AUTOFS4_FS=m # CONFIG_FUSE_FS is not set # @@ -1314,6 +1373,7 @@ CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_PROC_VMCORE=y CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y CONFIG_SYSFS=y CONFIG_TMPFS=y # CONFIG_TMPFS_POSIX_ACL is not set @@ -1356,6 +1416,7 @@ CONFIG_NFS_COMMON=y CONFIG_SUNRPC=m CONFIG_SUNRPC_GSS=m CONFIG_SUNRPC_XPRT_RDMA=m +# CONFIG_SUNRPC_REGISTER_V4 is not set CONFIG_RPCSEC_GSS_KRB5=m # CONFIG_RPCSEC_GSS_SPKM3 is not set CONFIG_SMB_FS=m @@ -1433,38 +1494,6 @@ CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=m # CONFIG_DLM is not set -CONFIG_HAVE_KVM=y -CONFIG_VIRTUALIZATION=y -# CONFIG_KVM is not set - -# -# Library routines -# -CONFIG_BITREVERSE=y -# CONFIG_GENERIC_FIND_FIRST_BIT is not set -# CONFIG_CRC_CCITT is not set -# CONFIG_CRC16 is not set -CONFIG_CRC_T10DIF=y -CONFIG_CRC_ITU_T=m -CONFIG_CRC32=y -# CONFIG_CRC7 is not set -# CONFIG_LIBCRC32C is not set -CONFIG_GENERIC_ALLOCATOR=y -CONFIG_PLIST=y -CONFIG_HAS_IOMEM=y -CONFIG_HAS_IOPORT=y -CONFIG_HAS_DMA=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_GENERIC_IRQ_PROBE=y -CONFIG_GENERIC_PENDING_IRQ=y -CONFIG_IRQ_PER_CPU=y - -# -# HP Simulator drivers -# -# CONFIG_HP_SIMETH is not set -# CONFIG_HP_SIMSERIAL is not set -# CONFIG_HP_SIMSCSI is not set # # Kernel hacking @@ -1503,8 +1532,19 @@ CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_SG is not set # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_FAULT_INJECTION is not set +CONFIG_SYSCTL_SYSCALL_CHECK=y + +# +# Tracers +# +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_DYNAMIC_PRINTK_DEBUG is not set # CONFIG_SAMPLES is not set CONFIG_IA64_GRANULE_16MB=y # CONFIG_IA64_GRANULE_64MB is not set @@ -1519,14 +1559,19 @@ CONFIG_SYSVIPC_COMPAT=y # # CONFIG_KEYS is not set # CONFIG_SECURITY is not set +# CONFIG_SECURITYFS is not set # CONFIG_SECURITY_FILE_CAPABILITIES is not set CONFIG_CRYPTO=y # # Crypto core or helper # +# CONFIG_CRYPTO_FIPS is not set CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_AEAD=m CONFIG_CRYPTO_BLKCIPHER=m +CONFIG_CRYPTO_HASH=m +CONFIG_CRYPTO_RNG=m CONFIG_CRYPTO_MANAGER=m # CONFIG_CRYPTO_GF128MUL is not set # CONFIG_CRYPTO_NULL is not set @@ -1599,5 +1644,36 @@ CONFIG_CRYPTO_DES=m # # CONFIG_CRYPTO_DEFLATE is not set # CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +CONFIG_HAVE_KVM=y +CONFIG_VIRTUALIZATION=y +# CONFIG_KVM is not set +# CONFIG_VIRTIO_PCI is not set +# CONFIG_VIRTIO_BALLOON is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set +CONFIG_CRC_T10DIF=y +CONFIG_CRC_ITU_T=m +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_GENERIC_ALLOCATOR=y +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_GENERIC_PENDING_IRQ=y +CONFIG_IRQ_PER_CPU=y diff --git a/arch/ia64/include/asm/paravirt_privop.h b/arch/ia64/include/asm/paravirt_privop.h index 0b597424fcfc..33c8e55f5775 100644 --- a/arch/ia64/include/asm/paravirt_privop.h +++ b/arch/ia64/include/asm/paravirt_privop.h @@ -83,7 +83,6 @@ extern unsigned long ia64_native_getreg_func(int regnum); #define paravirt_getreg(reg) \ ({ \ unsigned long res; \ - BUILD_BUG_ON(!__builtin_constant_p(reg)); \ if ((reg) == _IA64_REG_IP) \ res = ia64_native_getreg(_IA64_REG_IP); \ else \ diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 35bcb641c9e5..a3cc9f65f954 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -55,7 +55,6 @@ void build_cpu_to_node_map(void); #define SD_CPU_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ @@ -80,7 +79,6 @@ void build_cpu_to_node_map(void); /* sched_domains SD_NODE_INIT for IA64 NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 26228e2d01ae..c75b914f2d6b 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -53,10 +53,12 @@ int __ref arch_register_cpu(int num) } EXPORT_SYMBOL(arch_register_cpu); -void arch_unregister_cpu(int num) +void __ref arch_unregister_cpu(int num) { unregister_cpu(&sysfs_cpus[num].cpu); +#ifdef CONFIG_ACPI unmap_cpu_from_node(num, cpu_to_node(num)); +#endif } EXPORT_SYMBOL(arch_unregister_cpu); #else diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 96c31b4180c3..0c66dbdd1d72 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2000-2007 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2008 Silicon Graphics, Inc. All Rights Reserved. */ #include <linux/irq.h> @@ -375,6 +375,7 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) int cpu = nasid_slice_to_cpuid(nasid, slice); #ifdef CONFIG_SMP int cpuphys; + irq_desc_t *desc; #endif pci_dev_get(pci_dev); @@ -391,6 +392,12 @@ void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info) #ifdef CONFIG_SMP cpuphys = cpu_physical_id(cpu); set_irq_affinity_info(sn_irq_info->irq_irq, cpuphys, 0); + desc = irq_to_desc(sn_irq_info->irq_irq); + /* + * Affinity was set by the PROM, prevent it from + * being reset by the request_irq() path. + */ + desc->status |= IRQ_AFFINITY_SET; #endif } diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index bb1d24929640..02c5b8a9fb60 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -200,7 +200,7 @@ static int __cpuinitdata shub_1_1_found; * Set flag for enabling shub specific wars */ -static inline int __init is_shub_1_1(int nasid) +static inline int __cpuinit is_shub_1_1(int nasid) { unsigned long id; int rev; @@ -212,7 +212,7 @@ static inline int __init is_shub_1_1(int nasid) return rev <= 2; } -static void __init sn_check_for_wars(void) +static void __cpuinit sn_check_for_wars(void) { int cnode; @@ -512,7 +512,6 @@ static void __init sn_init_pdas(char **cmdline_p) for_each_online_node(cnode) { nodepdaindr[cnode] = alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t)); - memset(nodepdaindr[cnode], 0, sizeof(nodepda_t)); memset(nodepdaindr[cnode]->phys_cpuid, -1, sizeof(nodepdaindr[cnode]->phys_cpuid)); spin_lock_init(&nodepdaindr[cnode]->ptc_lock); @@ -521,11 +520,9 @@ static void __init sn_init_pdas(char **cmdline_p) /* * Allocate & initialize nodepda for TIOs. For now, put them on node 0. */ - for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) { + for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) nodepdaindr[cnode] = alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t)); - memset(nodepdaindr[cnode], 0, sizeof(nodepda_t)); - } /* * Now copy the array of nodepda pointers to each nodepda. diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index dbaed4a63815..29047d5c259a 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -273,7 +273,7 @@ config GENERIC_CALIBRATE_DELAY bool default y -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f4af967a6b30..a5255e7c79e0 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -653,7 +653,7 @@ config GENERIC_CMOS_UPDATE bool default y -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index 7785bec732f2..1fb959f98982 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h @@ -37,7 +37,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; /* sched_domains SD_NODE_INIT for SGI IP27 machines */ #define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index 5510c53b7feb..053e4634acee 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -79,6 +79,11 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) /* We don't do dynamic PCI IRQ allocation */ } +#define HAVE_PCI_MMAP + +extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine); + /* * Dynamic DMA mapping stuff. * MIPS has everything mapped statically. diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c index a377e9d2d029..62cae740e250 100644 --- a/arch/mips/pci/pci.c +++ b/arch/mips/pci/pci.c @@ -354,6 +354,30 @@ EXPORT_SYMBOL(PCIBIOS_MIN_IO); EXPORT_SYMBOL(PCIBIOS_MIN_MEM); #endif +int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine) +{ + unsigned long prot; + + /* + * I/O space can be accessed via normal processor loads and stores on + * this platform but for now we elect not to do this and portable + * drivers should not do this anyway. + */ + if (mmap_state == pci_mmap_io) + return -EINVAL; + + /* + * Ignore write-combine; for now only return uncached mappings. + */ + prot = pgprot_val(vma->vm_page_prot); + prot = (prot & ~_CACHE_MASK) | _CACHE_UNCACHED; + vma->vm_page_prot = __pgprot(prot); + + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, vma->vm_page_prot); +} + char * (*pcibios_plat_setup)(char *str) __devinitdata; char *__devinit pcibios_setup(char *str) diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S index b7cbb1487af4..62fba8aa9b6e 100644 --- a/arch/mn10300/kernel/entry.S +++ b/arch/mn10300/kernel/entry.S @@ -180,6 +180,7 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) + __cli mov (TI_preempt_count,a2),d0 # non-zero preempt_count ? cmp 0,d0 bne restore_all @@ -190,7 +191,7 @@ need_resched: mov (REG_EPSW,fp),d0 and EPSW_IM,d0 cmp EPSW_IM_7,d0 # interrupts off (exception path) ? - beq restore_all + bne restore_all call preempt_schedule_irq[],0 jmp need_resched #endif diff --git a/arch/mn10300/kernel/mn10300-serial.c b/arch/mn10300/kernel/mn10300-serial.c index aa07d0cd1905..59b9c4bf9583 100644 --- a/arch/mn10300/kernel/mn10300-serial.c +++ b/arch/mn10300/kernel/mn10300-serial.c @@ -566,6 +566,11 @@ static void mn10300_serial_transmit_interrupt(struct mn10300_serial_port *port) { _enter("%s", port->name); + if (!port->uart.info || !port->uart.info->port.tty) { + mn10300_serial_dis_tx_intr(port); + return; + } + if (uart_tx_stopped(&port->uart) || uart_circ_empty(&port->uart.info->xmit)) mn10300_serial_dis_tx_intr(port); diff --git a/arch/mn10300/kernel/setup.c b/arch/mn10300/kernel/setup.c index 017121ce896f..e1d88ab51008 100644 --- a/arch/mn10300/kernel/setup.c +++ b/arch/mn10300/kernel/setup.c @@ -161,7 +161,7 @@ void __init setup_arch(char **cmdline_p) reserve the page it is occupying. */ if (CONFIG_INTERRUPT_VECTOR_BASE >= CONFIG_KERNEL_RAM_BASE_ADDRESS && CONFIG_INTERRUPT_VECTOR_BASE < memory_end) - reserve_bootmem(CONFIG_INTERRUPT_VECTOR_BASE, 1, + reserve_bootmem(CONFIG_INTERRUPT_VECTOR_BASE, PAGE_SIZE, BOOTMEM_DEFAULT); reserve_bootmem(PAGE_ALIGN(PFN_PHYS(free_pfn)), bootmap_size, diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S index a3e80f444f55..b8259668f7dc 100644 --- a/arch/mn10300/kernel/vmlinux.lds.S +++ b/arch/mn10300/kernel/vmlinux.lds.S @@ -11,6 +11,7 @@ #define __VMLINUX_LDS__ #include <asm-generic/vmlinux.lds.h> #include <asm/thread_info.h> +#include <asm/page.h> OUTPUT_FORMAT("elf32-am33lin", "elf32-am33lin", "elf32-am33lin") OUTPUT_ARCH(mn10300) @@ -55,13 +56,13 @@ SECTIONS CONSTRUCTORS } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __nosave_begin = .; .data_nosave : { *(.data.nosave) } - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __nosave_end = .; - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); .data.page_aligned : { *(.data.idt) } . = ALIGN(32); @@ -78,7 +79,7 @@ SECTIONS .data.init_task : { *(.data.init_task) } /* might get freed after init */ - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { __smp_locks = .; *(.smp_locks) @@ -86,7 +87,7 @@ SECTIONS } /* will be freed after init */ - . = ALIGN(4096); /* Init code and data */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; .init.text : { _sinittext = .; @@ -120,17 +121,14 @@ SECTIONS .exit.data : { *(.exit.data) } #ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); __initramfs_start = .; .init.ramfs : { *(.init.ramfs) } __initramfs_end = .; #endif - . = ALIGN(32); - __per_cpu_start = .; - .data.percpu : { *(.data.percpu) } - __per_cpu_end = .; - . = ALIGN(4096); + PERCPU(32) + . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ @@ -145,7 +143,7 @@ SECTIONS _end = . ; /* This is where the kernel creates the early boot page tables */ - . = ALIGN(4096); + . = ALIGN(PAGE_SIZE); pg0 = .; /* Sections to be discarded */ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 525c13a4de93..adb23ea1c1ef 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -141,7 +141,7 @@ config GENERIC_NVRAM bool default y if PPC32 -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER bool default y diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index b298f7a631e6..e5f2ae8362f7 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -7,7 +7,19 @@ #ifndef __ASSEMBLY__ extern void _mcount(void); -#endif + +#ifdef CONFIG_DYNAMIC_FTRACE +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + /* reloction of mcount call site is the same as the address */ + return addr; +} + +struct dyn_arch_ftrace { + struct module *mod; +}; +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* __ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index e5f14b13ccf0..08454880a2c0 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -34,11 +34,19 @@ struct mod_arch_specific { #ifdef __powerpc64__ unsigned int stubs_section; /* Index of stubs section in module */ unsigned int toc_section; /* What section is the TOC? */ -#else +#ifdef CONFIG_DYNAMIC_FTRACE + unsigned long toc; + unsigned long tramp; +#endif + +#else /* powerpc64 */ /* Indices of PLT sections within module. */ unsigned int core_plt_section; unsigned int init_plt_section; +#ifdef CONFIG_DYNAMIC_FTRACE + unsigned long tramp; #endif +#endif /* powerpc64 */ /* List of BUG addresses, source line numbers and filenames */ struct list_head bug_list; @@ -68,6 +76,12 @@ struct mod_arch_specific { # endif /* MODULE */ #endif +#ifdef CONFIG_DYNAMIC_FTRACE +# ifdef MODULE + asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous"); +# endif /* MODULE */ +#endif + struct exception_table_entry; void sort_ex_table(struct exception_table_entry *start, diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index c32da6f97999..373fca394a54 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -48,7 +48,6 @@ static inline int pcibus_to_node(struct pci_bus *bus) /* sched_domains SD_NODE_INIT for PPC64 machines */ #define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 92673b43858d..d17edb4a2f9d 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -17,6 +17,7 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog +CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog ifdef CONFIG_DYNAMIC_FTRACE # dynamic ftrace setup. diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S index 31c18b52affb..10b4ab1008af 100644 --- a/arch/powerpc/kernel/cpu_setup_44x.S +++ b/arch/powerpc/kernel/cpu_setup_44x.S @@ -40,6 +40,7 @@ _GLOBAL(__setup_cpu_460gt) mtlr r4 blr +_GLOBAL(__setup_cpu_440x5) _GLOBAL(__setup_cpu_440gx) _GLOBAL(__setup_cpu_440spe) b __fixup_440A_mcheck diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index b1eb834bc0fc..7e8719504f39 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -39,6 +39,7 @@ extern void __setup_cpu_440epx(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_440x5(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec); @@ -1500,6 +1501,8 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_user_features = COMMON_USER_BOOKE, .icache_bsize = 32, .dcache_bsize = 32, + .cpu_setup = __setup_cpu_440x5, + .machine_check = machine_check_440A, .platform = "ppc440", }, { /* 460EX */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 7ecc0d1855c3..6f7eb7e00c79 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -1162,39 +1162,17 @@ machine_check_in_rtas: #ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) - stwu r1,-48(r1) - stw r3, 12(r1) - stw r4, 16(r1) - stw r5, 20(r1) - stw r6, 24(r1) - mflr r3 - stw r7, 28(r1) - mfcr r5 - stw r8, 32(r1) - stw r9, 36(r1) - stw r10,40(r1) - stw r3, 44(r1) - stw r5, 8(r1) - subi r3, r3, MCOUNT_INSN_SIZE - .globl mcount_call -mcount_call: - bl ftrace_stub - nop - lwz r6, 8(r1) - lwz r0, 44(r1) - lwz r3, 12(r1) + /* + * It is required that _mcount on PPC32 must preserve the + * link register. But we have r0 to play with. We use r0 + * to push the return address back to the caller of mcount + * into the ctr register, restore the link register and + * then jump back using the ctr register. + */ + mflr r0 mtctr r0 - lwz r4, 16(r1) - mtcr r6 - lwz r5, 20(r1) - lwz r6, 24(r1) - lwz r0, 52(r1) - lwz r7, 28(r1) - lwz r8, 32(r1) + lwz r0, 4(r1) mtlr r0 - lwz r9, 36(r1) - lwz r10,40(r1) - addi r1, r1, 48 bctr _GLOBAL(ftrace_caller) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index e0bcf9354286..383ed6eb0085 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -894,18 +894,6 @@ _GLOBAL(enter_prom) #ifdef CONFIG_DYNAMIC_FTRACE _GLOBAL(mcount) _GLOBAL(_mcount) - /* Taken from output of objdump from lib64/glibc */ - mflr r3 - stdu r1, -112(r1) - std r3, 128(r1) - subi r3, r3, MCOUNT_INSN_SIZE - .globl mcount_call -mcount_call: - bl ftrace_stub - nop - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 blr _GLOBAL(ftrace_caller) diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index f4b006ed0ab1..5355244c99ff 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -9,22 +9,30 @@ #include <linux/spinlock.h> #include <linux/hardirq.h> +#include <linux/uaccess.h> +#include <linux/module.h> #include <linux/ftrace.h> #include <linux/percpu.h> #include <linux/init.h> #include <linux/list.h> #include <asm/cacheflush.h> +#include <asm/code-patching.h> #include <asm/ftrace.h> +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(fmt , ...) do { } while (0) +#endif -static unsigned int ftrace_nop = 0x60000000; +static unsigned int ftrace_nop = PPC_NOP_INSTR; #ifdef CONFIG_PPC32 # define GET_ADDR(addr) addr #else /* PowerPC64's functions are data that points to the functions */ -# define GET_ADDR(addr) *(unsigned long *)addr +# define GET_ADDR(addr) (*(unsigned long *)addr) #endif @@ -33,12 +41,12 @@ static unsigned int ftrace_calc_offset(long ip, long addr) return (int)(addr - ip); } -unsigned char *ftrace_nop_replace(void) +static unsigned char *ftrace_nop_replace(void) { return (char *)&ftrace_nop; } -unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static unsigned int op; @@ -68,49 +76,422 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) # define _ASM_PTR " .long " #endif -int +static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { - unsigned replaced; - unsigned old = *(unsigned *)old_code; - unsigned new = *(unsigned *)new_code; - int faulted = 0; + unsigned char replaced[MCOUNT_INSN_SIZE]; /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting - * as well as code changing. + * as well as code changing. We do this by using the + * probe_kernel_* functions. * * No real locking needed, this code is run through - * kstop_machine. + * kstop_machine, or before SMP starts. */ - asm volatile ( - "1: lwz %1, 0(%2)\n" - " cmpw %1, %5\n" - " bne 2f\n" - " stwu %3, 0(%2)\n" - "2:\n" - ".section .fixup, \"ax\"\n" - "3: li %0, 1\n" - " b 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - _ASM_ALIGN "\n" - _ASM_PTR "1b, 3b\n" - ".previous" - : "=r"(faulted), "=r"(replaced) - : "r"(ip), "r"(new), - "0"(faulted), "r"(old) - : "memory"); - - if (replaced != old && replaced != new) - faulted = 2; - - if (!faulted) - flush_icache_range(ip, ip + 8); - - return faulted; + + /* read the text we want to modify */ + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* Make sure it is what we expect it to be */ + if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + + /* replace the text with the new text */ + if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) + return -EPERM; + + flush_icache_range(ip, ip + 8); + + return 0; +} + +/* + * Helper functions that are the same for both PPC64 and PPC32. + */ +static int test_24bit_addr(unsigned long ip, unsigned long addr) +{ + + /* use the create_branch to verify that this offset can be branched */ + return create_branch((unsigned int *)ip, addr, 0); +} + +static int is_bl_op(unsigned int op) +{ + return (op & 0xfc000003) == 0x48000001; +} + +static unsigned long find_bl_target(unsigned long ip, unsigned int op) +{ + static int offset; + + offset = (op & 0x03fffffc); + /* make it signed */ + if (offset & 0x02000000) + offset |= 0xfe000000; + + return ip + (long)offset; +} + +#ifdef CONFIG_PPC64 +static int +__ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int op; + unsigned int jmp[5]; + unsigned long ptr; + unsigned long ip = rec->ip; + unsigned long tramp; + int offset; + + /* read where this goes */ + if (probe_kernel_read(&op, (void *)ip, sizeof(int))) + return -EFAULT; + + /* Make sure that that this is still a 24bit jump */ + if (!is_bl_op(op)) { + printk(KERN_ERR "Not expected bl: opcode is %x\n", op); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, op); + + /* + * On PPC64 the trampoline looks like: + * 0x3d, 0x82, 0x00, 0x00, addis r12,r2, <high> + * 0x39, 0x8c, 0x00, 0x00, addi r12,r12, <low> + * Where the bytes 2,3,6 and 7 make up the 32bit offset + * to the TOC that holds the pointer. + * to jump to. + * 0xf8, 0x41, 0x00, 0x28, std r2,40(r1) + * 0xe9, 0x6c, 0x00, 0x20, ld r11,32(r12) + * The actually address is 32 bytes from the offset + * into the TOC. + * 0xe8, 0x4c, 0x00, 0x28, ld r2,40(r12) + */ + + DEBUGP("ip:%lx jumps to %lx r2: %lx", ip, tramp, mod->arch.toc); + + /* Find where the trampoline jumps to */ + if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { + printk(KERN_ERR "Failed to read %lx\n", tramp); + return -EFAULT; + } + + DEBUGP(" %08x %08x", jmp[0], jmp[1]); + + /* verify that this is what we expect it to be */ + if (((jmp[0] & 0xffff0000) != 0x3d820000) || + ((jmp[1] & 0xffff0000) != 0x398c0000) || + (jmp[2] != 0xf8410028) || + (jmp[3] != 0xe96c0020) || + (jmp[4] != 0xe84c0028)) { + printk(KERN_ERR "Not a trampoline\n"); + return -EINVAL; + } + + offset = (unsigned)((unsigned short)jmp[0]) << 16 | + (unsigned)((unsigned short)jmp[1]); + + DEBUGP(" %x ", offset); + + /* get the address this jumps too */ + tramp = mod->arch.toc + offset + 32; + DEBUGP("toc: %lx", tramp); + + if (probe_kernel_read(jmp, (void *)tramp, 8)) { + printk(KERN_ERR "Failed to read %lx\n", tramp); + return -EFAULT; + } + + DEBUGP(" %08x %08x\n", jmp[0], jmp[1]); + + ptr = ((unsigned long)jmp[0] << 32) + jmp[1]; + + /* This should match what was called */ + if (ptr != GET_ADDR(addr)) { + printk(KERN_ERR "addr does not match %lx\n", ptr); + return -EINVAL; + } + + /* + * We want to nop the line, but the next line is + * 0xe8, 0x41, 0x00, 0x28 ld r2,40(r1) + * This needs to be turned to a nop too. + */ + if (probe_kernel_read(&op, (void *)(ip+4), MCOUNT_INSN_SIZE)) + return -EFAULT; + + if (op != 0xe8410028) { + printk(KERN_ERR "Next line is not ld! (%08x)\n", op); + return -EINVAL; + } + + /* + * Milton Miller pointed out that we can not blindly do nops. + * If a task was preempted when calling a trace function, + * the nops will remove the way to restore the TOC in r2 + * and the r2 TOC will get corrupted. + */ + + /* + * Replace: + * bl <tramp> <==== will be replaced with "b 1f" + * ld r2,40(r1) + * 1: + */ + op = 0x48000008; /* b +8 */ + + if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + return -EPERM; + + + flush_icache_range(ip, ip + 8); + + return 0; +} + +#else /* !PPC64 */ +static int +__ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int op; + unsigned int jmp[4]; + unsigned long ip = rec->ip; + unsigned long tramp; + + if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* Make sure that that this is still a 24bit jump */ + if (!is_bl_op(op)) { + printk(KERN_ERR "Not expected bl: opcode is %x\n", op); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, op); + + /* + * On PPC32 the trampoline looks like: + * 0x3d, 0x60, 0x00, 0x00 lis r11,sym@ha + * 0x39, 0x6b, 0x00, 0x00 addi r11,r11,sym@l + * 0x7d, 0x69, 0x03, 0xa6 mtctr r11 + * 0x4e, 0x80, 0x04, 0x20 bctr + */ + + DEBUGP("ip:%lx jumps to %lx", ip, tramp); + + /* Find where the trampoline jumps to */ + if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { + printk(KERN_ERR "Failed to read %lx\n", tramp); + return -EFAULT; + } + + DEBUGP(" %08x %08x ", jmp[0], jmp[1]); + + /* verify that this is what we expect it to be */ + if (((jmp[0] & 0xffff0000) != 0x3d600000) || + ((jmp[1] & 0xffff0000) != 0x396b0000) || + (jmp[2] != 0x7d6903a6) || + (jmp[3] != 0x4e800420)) { + printk(KERN_ERR "Not a trampoline\n"); + return -EINVAL; + } + + tramp = (jmp[1] & 0xffff) | + ((jmp[0] & 0xffff) << 16); + if (tramp & 0x8000) + tramp -= 0x10000; + + DEBUGP(" %x ", tramp); + + if (tramp != addr) { + printk(KERN_ERR + "Trampoline location %08lx does not match addr\n", + tramp); + return -EINVAL; + } + + op = PPC_NOP_INSTR; + + if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + return -EPERM; + + flush_icache_range(ip, ip + 8); + + return 0; +} +#endif /* PPC64 */ + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *old, *new; + unsigned long ip = rec->ip; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + old = ftrace_call_replace(ip, addr); + new = ftrace_nop_replace(); + return ftrace_modify_code(ip, old, new); + } + + /* + * Out of range jumps are called from modules. + * We should either already have a pointer to the module + * or it has been passed in. + */ + if (!rec->arch.mod) { + if (!mod) { + printk(KERN_ERR "No module loaded addr=%lx\n", + addr); + return -EFAULT; + } + rec->arch.mod = mod; + } else if (mod) { + if (mod != rec->arch.mod) { + printk(KERN_ERR + "Record mod %p not equal to passed in mod %p\n", + rec->arch.mod, mod); + return -EINVAL; + } + /* nothing to do if mod == rec->arch.mod */ + } else + mod = rec->arch.mod; + + return __ftrace_make_nop(mod, rec, addr); + +} + +#ifdef CONFIG_PPC64 +static int +__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int op[2]; + unsigned long ip = rec->ip; + + /* read where this goes */ + if (probe_kernel_read(op, (void *)ip, MCOUNT_INSN_SIZE * 2)) + return -EFAULT; + + /* + * It should be pointing to two nops or + * b +8; ld r2,40(r1) + */ + if (((op[0] != 0x48000008) || (op[1] != 0xe8410028)) && + ((op[0] != PPC_NOP_INSTR) || (op[1] != PPC_NOP_INSTR))) { + printk(KERN_ERR "Expected NOPs but have %x %x\n", op[0], op[1]); + return -EINVAL; + } + + /* If we never set up a trampoline to ftrace_caller, then bail */ + if (!rec->arch.mod->arch.tramp) { + printk(KERN_ERR "No ftrace trampoline\n"); + return -EINVAL; + } + + /* create the branch to the trampoline */ + op[0] = create_branch((unsigned int *)ip, + rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + if (!op[0]) { + printk(KERN_ERR "REL24 out of range!\n"); + return -EINVAL; + } + + /* ld r2,40(r1) */ + op[1] = 0xe8410028; + + DEBUGP("write to %lx\n", rec->ip); + + if (probe_kernel_write((void *)ip, op, MCOUNT_INSN_SIZE * 2)) + return -EPERM; + + flush_icache_range(ip, ip + 8); + + return 0; +} +#else +static int +__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int op; + unsigned long ip = rec->ip; + + /* read where this goes */ + if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* It should be pointing to a nop */ + if (op != PPC_NOP_INSTR) { + printk(KERN_ERR "Expected NOP but have %x\n", op); + return -EINVAL; + } + + /* If we never set up a trampoline to ftrace_caller, then bail */ + if (!rec->arch.mod->arch.tramp) { + printk(KERN_ERR "No ftrace trampoline\n"); + return -EINVAL; + } + + /* create the branch to the trampoline */ + op = create_branch((unsigned int *)ip, + rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + if (!op) { + printk(KERN_ERR "REL24 out of range!\n"); + return -EINVAL; + } + + DEBUGP("write to %lx\n", rec->ip); + + if (probe_kernel_write((void *)ip, &op, MCOUNT_INSN_SIZE)) + return -EPERM; + + flush_icache_range(ip, ip + 8); + + return 0; +} +#endif /* CONFIG_PPC64 */ + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *old, *new; + unsigned long ip = rec->ip; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + old = ftrace_nop_replace(); + new = ftrace_call_replace(ip, addr); + return ftrace_modify_code(ip, old, new); + } + + /* + * Out of range jumps are called from modules. + * Being that we are converting from nop, it had better + * already have a module defined. + */ + if (!rec->arch.mod) { + printk(KERN_ERR "No module loaded\n"); + return -EINVAL; + } + + return __ftrace_make_call(rec, addr); } int ftrace_update_ftrace_func(ftrace_func_t func) @@ -128,10 +509,10 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int __init ftrace_dyn_arch_init(void *data) { - /* This is running in kstop_machine */ + /* caller expects data to be zero */ + unsigned long *p = data; - ftrace_mcount_set(data); + *p = 0; return 0; } - diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 31982d05d81a..88d9c1d5e5fb 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -69,10 +69,15 @@ void cpu_idle(void) smp_mb(); local_irq_disable(); + /* Don't trace irqs off for idle */ + stop_critical_timings(); + /* check again after disabling irqs */ if (!need_resched() && !cpu_should_die()) ppc_md.power_save(); + start_critical_timings(); + local_irq_enable(); set_thread_flag(TIF_POLLING_NRFLAG); diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 2df91a03462a..f832773fc28e 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -22,6 +22,7 @@ #include <linux/fs.h> #include <linux/string.h> #include <linux/kernel.h> +#include <linux/ftrace.h> #include <linux/cache.h> #include <linux/bug.h> #include <linux/sort.h> @@ -53,6 +54,9 @@ static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num) r_addend = rela[i].r_addend; } +#ifdef CONFIG_DYNAMIC_FTRACE + _count_relocs++; /* add one for ftrace_caller */ +#endif return _count_relocs; } @@ -306,5 +310,11 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, return -ENOEXEC; } } +#ifdef CONFIG_DYNAMIC_FTRACE + module->arch.tramp = + do_plt_call(module->module_core, + (unsigned long)ftrace_caller, + sechdrs, module); +#endif return 0; } diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 1af2377e4992..8992b031a7b6 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -20,6 +20,7 @@ #include <linux/moduleloader.h> #include <linux/err.h> #include <linux/vmalloc.h> +#include <linux/ftrace.h> #include <linux/bug.h> #include <asm/module.h> #include <asm/firmware.h> @@ -163,6 +164,11 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, } } +#ifdef CONFIG_DYNAMIC_FTRACE + /* make the trampoline to the ftrace_caller */ + relocs++; +#endif + DEBUGP("Looks like a total of %lu stubs, max\n", relocs); return relocs * sizeof(struct ppc64_stub_entry); } @@ -441,5 +447,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } } +#ifdef CONFIG_DYNAMIC_FTRACE + me->arch.toc = my_r2(sechdrs, me); + me->arch.tramp = stub_for_addr(sechdrs, + (unsigned long)ftrace_caller, + me); +#endif + return 0; } diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index d69912c07ce7..8db35278a4b4 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -6,6 +6,9 @@ ifeq ($(CONFIG_PPC64),y) EXTRA_CFLAGS += -mno-minimal-toc endif +CFLAGS_REMOVE_code-patching.o = -pg +CFLAGS_REMOVE_feature-fixups.o = -pg + obj-y := string.o alloc.o \ checksum_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index a947899dcba1..bf96f1b5c6ec 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -212,7 +212,7 @@ static void update_cpu_core_map(void) cpu_core_map[cpu] = cpu_coregroup_map(cpu); } -void arch_update_cpu_topology(void) +int arch_update_cpu_topology(void) { struct tl_info *info = tl_info; struct sys_device *sysdev; @@ -221,7 +221,7 @@ void arch_update_cpu_topology(void) if (!machine_has_topology) { update_cpu_core_map(); topology_update_polarization_simple(); - return; + return 0; } stsi(info, 15, 1, 2); tl_to_cores(info); @@ -230,6 +230,7 @@ void arch_update_cpu_topology(void) sysdev = get_cpu_sysdev(cpu); kobject_uevent(&sysdev->kobj, KOBJ_CHANGE); } + return 1; } static void topology_work_fn(struct work_struct *work) diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index 95f0085e098a..279d9cc4a007 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -5,7 +5,6 @@ /* sched_domains SD_NODE_INIT for sh machines */ #define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ .parent = NULL, \ .child = NULL, \ .groups = NULL, \ diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 19d579d74d27..8f44ebb0dec8 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -16,6 +16,8 @@ #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/utsname.h> +#include <linux/socket.h> +#include <linux/un.h> #include <linux/workqueue.h> #include <linux/mutex.h> #include <asm/uaccess.h> @@ -785,7 +787,7 @@ static int __init mconsole_init(void) /* long to avoid size mismatch warnings from gcc */ long sock; int err; - char file[256]; + char file[UNIX_PATH_MAX]; if (umid_file_name("mconsole", file, sizeof(file))) return -1; diff --git a/arch/um/include/asm/system.h b/arch/um/include/asm/system.h index 753346e2cdfd..ae5f94d6317d 100644 --- a/arch/um/include/asm/system.h +++ b/arch/um/include/asm/system.h @@ -11,21 +11,21 @@ extern int get_signals(void); extern void block_signals(void); extern void unblock_signals(void); -#define local_save_flags(flags) do { typecheck(unsigned long, flags); \ +#define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \ (flags) = get_signals(); } while(0) -#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \ +#define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \ set_signals(flags); } while(0) -#define local_irq_save(flags) do { local_save_flags(flags); \ - local_irq_disable(); } while(0) +#define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \ + raw_local_irq_disable(); } while(0) -#define local_irq_enable() unblock_signals() -#define local_irq_disable() block_signals() +#define raw_local_irq_enable() unblock_signals() +#define raw_local_irq_disable() block_signals() #define irqs_disabled() \ ({ \ unsigned long flags; \ - local_save_flags(flags); \ + raw_local_save_flags(flags); \ (flags == 0); \ }) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8943c13502c6..d99eeb7915c6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -29,11 +29,14 @@ config X86 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS + select USER_STACKTRACE_SUPPORT config ARCH_DEFCONFIG string @@ -377,10 +380,10 @@ config X86_RDC321X as R-8610-(G). If you don't have one of these chips, you should say N here. -config SCHED_NO_NO_OMIT_FRAME_POINTER +config SCHED_OMIT_FRAME_POINTER def_bool y prompt "Single-depth WCHAN output" - depends on X86_32 + depends on X86 help Calculate simpler /proc/<PID>/wchan values. If this option is disabled then wchan values will recurse back to the @@ -475,10 +478,6 @@ config X86_CYCLONE_TIMER def_bool y depends on X86_GENERICARCH -config ES7000_CLUSTERED_APIC - def_bool y - depends on SMP && X86_ES7000 && MPENTIUMIII - source "arch/x86/Kconfig.cpu" config HPET_TIMER @@ -1642,13 +1641,6 @@ config APM_ALLOW_INTS many of the newer IBM Thinkpads. If you experience hangs when you suspend, try setting this to Y. Otherwise, say N. -config APM_REAL_MODE_POWER_OFF - bool "Use real mode APM BIOS call to power off" - help - Use real mode APM BIOS calls to switch off the computer. This is - a work-around for a number of buggy BIOSes. Switch this option on if - your computer crashes instead of powering off properly. - endif # APM source "arch/x86/kernel/cpu/cpufreq/Kconfig" diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index b815664fe370..85a78575956c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -515,6 +515,7 @@ config CPU_SUP_UMC_32 config X86_DS def_bool X86_PTRACE_BTS depends on X86_DEBUGCTLMSR + select HAVE_HW_BRANCH_TRACER config X86_PTRACE_BTS bool "Branch Trace Store" diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 2a3dfbd5e677..fa013f529b74 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -186,14 +186,10 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config MMIOTRACE_HOOKS - bool - config MMIOTRACE bool "Memory mapped IO tracing" depends on DEBUG_KERNEL && PCI select TRACING - select MMIOTRACE_HOOKS help Mmiotrace traces Memory Mapped I/O access and is meant for debugging and reverse engineering. It is called from the ioremap diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3b1510b4fc57..25caa0738af5 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -193,6 +193,7 @@ extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask); static inline void lapic_shutdown(void) { } #define local_apic_timer_c2_ok 1 static inline void init_apic_mappings(void) { } +static inline void disable_local_APIC(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 1d9543b9d358..ce547f24a1cd 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h @@ -24,8 +24,6 @@ static inline cpumask_t target_cpus(void) #define INT_DELIVERY_MODE (dest_Fixed) #define INT_DEST_MODE (0) /* phys delivery to target proc */ #define NO_BALANCE_IRQ (0) -#define WAKE_SECONDARY_VIA_INIT - static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index a95008457ea4..99b6c39774a4 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -7,13 +7,12 @@ * * It manages: * - per-thread and per-cpu allocation of BTS and PEBS - * - buffer memory allocation (optional) - * - buffer overflow handling + * - buffer overflow handling (to be done) * - buffer access * * It assumes: - * - get_task_struct on all parameter tasks - * - current is allowed to trace parameter tasks + * - get_task_struct on all traced tasks + * - current is allowed to trace tasks * * * Copyright (C) 2007-2008 Intel Corporation. @@ -26,11 +25,18 @@ #include <linux/types.h> #include <linux/init.h> +#include <linux/err.h> #ifdef CONFIG_X86_DS struct task_struct; +struct ds_tracer; +struct bts_tracer; +struct pebs_tracer; + +typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); +typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); /* * Request BTS or PEBS @@ -38,60 +44,62 @@ struct task_struct; * Due to alignement constraints, the actual buffer may be slightly * smaller than the requested or provided buffer. * - * Returns 0 on success; -Eerrno otherwise + * Returns a pointer to a tracer structure on success, or + * ERR_PTR(errcode) on failure. + * + * The interrupt threshold is independent from the overflow callback + * to allow users to use their own overflow interrupt handling mechanism. * * task: the task to request recording for; * NULL for per-cpu recording on the current cpu * base: the base pointer for the (non-pageable) buffer; - * NULL if buffer allocation requested - * size: the size of the requested or provided buffer + * size: the size of the provided buffer in bytes * ovfl: pointer to a function to be called on buffer overflow; * NULL if cyclic buffer requested + * th: the interrupt threshold in records from the end of the buffer; + * -1 if no interrupt threshold is requested. */ -typedef void (*ds_ovfl_callback_t)(struct task_struct *); -extern int ds_request_bts(struct task_struct *task, void *base, size_t size, - ds_ovfl_callback_t ovfl); -extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, - ds_ovfl_callback_t ovfl); +extern struct bts_tracer *ds_request_bts(struct task_struct *task, + void *base, size_t size, + bts_ovfl_callback_t ovfl, size_t th); +extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th); /* * Release BTS or PEBS resources * - * Frees buffers allocated on ds_request. - * * Returns 0 on success; -Eerrno otherwise * - * task: the task to release resources for; - * NULL to release resources for the current cpu + * tracer: the tracer handle returned from ds_request_~() */ -extern int ds_release_bts(struct task_struct *task); -extern int ds_release_pebs(struct task_struct *task); +extern int ds_release_bts(struct bts_tracer *tracer); +extern int ds_release_pebs(struct pebs_tracer *tracer); /* - * Return the (array) index of the write pointer. + * Get the (array) index of the write pointer. * (assuming an array of BTS/PEBS records) * - * Returns -Eerrno on error + * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu - * pos (out): if not NULL, will hold the result + * tracer: the tracer handle returned from ds_request_~() + * pos (out): will hold the result */ -extern int ds_get_bts_index(struct task_struct *task, size_t *pos); -extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); +extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos); +extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos); /* - * Return the (array) index one record beyond the end of the array. + * Get the (array) index one record beyond the end of the array. * (assuming an array of BTS/PEBS records) * - * Returns -Eerrno on error + * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu - * pos (out): if not NULL, will hold the result + * tracer: the tracer handle returned from ds_request_~() + * pos (out): will hold the result */ -extern int ds_get_bts_end(struct task_struct *task, size_t *pos); -extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); +extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos); +extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos); /* * Provide a pointer to the BTS/PEBS record at parameter index. @@ -102,14 +110,13 @@ extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); * * Returns the size of a single record on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu + * tracer: the tracer handle returned from ds_request_~() * index: the index of the requested record * record (out): pointer to the requested record */ -extern int ds_access_bts(struct task_struct *task, +extern int ds_access_bts(struct bts_tracer *tracer, size_t index, const void **record); -extern int ds_access_pebs(struct task_struct *task, +extern int ds_access_pebs(struct pebs_tracer *tracer, size_t index, const void **record); /* @@ -129,38 +136,24 @@ extern int ds_access_pebs(struct task_struct *task, * * Returns the number of bytes written or -Eerrno. * - * task: the task to access; - * NULL to access the current cpu + * tracer: the tracer handle returned from ds_request_~() * buffer: the buffer to write * size: the size of the buffer */ -extern int ds_write_bts(struct task_struct *task, +extern int ds_write_bts(struct bts_tracer *tracer, const void *buffer, size_t size); -extern int ds_write_pebs(struct task_struct *task, +extern int ds_write_pebs(struct pebs_tracer *tracer, const void *buffer, size_t size); /* - * Same as ds_write_bts/pebs, but omit ownership checks. - * - * This is needed to have some other task than the owner of the - * BTS/PEBS buffer or the parameter task itself write into the - * respective buffer. - */ -extern int ds_unchecked_write_bts(struct task_struct *task, - const void *buffer, size_t size); -extern int ds_unchecked_write_pebs(struct task_struct *task, - const void *buffer, size_t size); - -/* * Reset the write pointer of the BTS/PEBS buffer. * * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu + * tracer: the tracer handle returned from ds_request_~() */ -extern int ds_reset_bts(struct task_struct *task); -extern int ds_reset_pebs(struct task_struct *task); +extern int ds_reset_bts(struct bts_tracer *tracer); +extern int ds_reset_pebs(struct pebs_tracer *tracer); /* * Clear the BTS/PEBS buffer and reset the write pointer. @@ -168,33 +161,30 @@ extern int ds_reset_pebs(struct task_struct *task); * * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu + * tracer: the tracer handle returned from ds_request_~() */ -extern int ds_clear_bts(struct task_struct *task); -extern int ds_clear_pebs(struct task_struct *task); +extern int ds_clear_bts(struct bts_tracer *tracer); +extern int ds_clear_pebs(struct pebs_tracer *tracer); /* * Provide the PEBS counter reset value. * * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu + * tracer: the tracer handle returned from ds_request_pebs() * value (out): the counter reset value */ -extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); +extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value); /* * Set the PEBS counter reset value. * * Returns 0 on success; -Eerrno on error * - * task: the task to access; - * NULL to access the current cpu + * tracer: the tracer handle returned from ds_request_pebs() * value: the new counter reset value */ -extern int ds_set_pebs_reset(struct task_struct *task, u64 value); +extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); /* * Initialization @@ -207,17 +197,13 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); /* * The DS context - part of struct thread_struct. */ +#define MAX_SIZEOF_DS (12 * 8) + struct ds_context { /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ - unsigned char *ds; + unsigned char ds[MAX_SIZEOF_DS]; /* the owner of the BTS and PEBS configuration, respectively */ - struct task_struct *owner[2]; - /* buffer overflow notification function for BTS and PEBS */ - ds_ovfl_callback_t callback[2]; - /* the original buffer address */ - void *buffer[2]; - /* the number of allocated pages for on-request allocated buffers */ - unsigned int pages[2]; + struct ds_tracer *owner[2]; /* use count */ unsigned long count; /* a pointer to the context location inside the thread_struct diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h index 94826cf87455..cc70c1c78ca4 100644 --- a/arch/x86/include/asm/emergency-restart.h +++ b/arch/x86/include/asm/emergency-restart.h @@ -8,7 +8,9 @@ enum reboot_type { BOOT_BIOS = 'b', #endif BOOT_ACPI = 'a', - BOOT_EFI = 'e' + BOOT_EFI = 'e', + BOOT_CF9 = 'p', + BOOT_CF9_COND = 'q', }; extern enum reboot_type reboot_type; diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 380f0b4f17ed..e24ef876915f 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h @@ -9,31 +9,27 @@ static inline int apic_id_registered(void) return (1); } -static inline cpumask_t target_cpus(void) +static inline cpumask_t target_cpus_cluster(void) { -#if defined CONFIG_ES7000_CLUSTERED_APIC return CPU_MASK_ALL; -#else +} + +static inline cpumask_t target_cpus(void) +{ return cpumask_of_cpu(smp_processor_id()); -#endif } -#if defined CONFIG_ES7000_CLUSTERED_APIC -#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) -#define INT_DELIVERY_MODE (dest_LowestPrio) -#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ -#define NO_BALANCE_IRQ (1) -#undef WAKE_SECONDARY_VIA_INIT -#define WAKE_SECONDARY_VIA_MIP -#else +#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) +#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) +#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ +#define NO_BALANCE_IRQ_CLUSTER (1) + #define APIC_DFR_VALUE (APIC_DFR_FLAT) #define INT_DELIVERY_MODE (dest_Fixed) #define INT_DEST_MODE (0) /* phys delivery to target procs */ #define NO_BALANCE_IRQ (0) #undef APIC_DEST_LOGICAL #define APIC_DEST_LOGICAL 0x0 -#define WAKE_SECONDARY_VIA_INIT -#endif static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) { @@ -60,6 +56,16 @@ static inline unsigned long calculate_ldr(int cpu) * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel * document number 292116). So here it goes... */ +static inline void init_apic_ldr_cluster(void) +{ + unsigned long val; + int cpu = smp_processor_id(); + + apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); + val = calculate_ldr(cpu); + apic_write(APIC_LDR, val); +} + static inline void init_apic_ldr(void) { unsigned long val; @@ -70,10 +76,6 @@ static inline void init_apic_ldr(void) apic_write(APIC_LDR, val); } -#ifndef CONFIG_X86_GENERICARCH -extern void enable_apic_mode(void); -#endif - extern int apic_version [MAX_APICS]; static inline void setup_apic_routing(void) { @@ -144,7 +146,7 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid) return (1); } -static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +static inline unsigned int cpu_mask_to_apicid_cluster(cpumask_t cpumask) { int num_bits_set; int cpus_found = 0; @@ -154,11 +156,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) num_bits_set = cpus_weight(cpumask); /* Return id to all */ if (num_bits_set == NR_CPUS) -#if defined CONFIG_ES7000_CLUSTERED_APIC return 0xFF; -#else - return cpu_to_logical_apicid(0); -#endif /* * The cpus in the mask must all be on the apic cluster. If are not * on the same apicid cluster return default value of TARGET_CPUS. @@ -171,11 +169,40 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) if (apicid_cluster(apicid) != apicid_cluster(new_apicid)){ printk ("%s: Not a valid mask!\n", __func__); -#if defined CONFIG_ES7000_CLUSTERED_APIC return 0xFF; -#else + } + apicid = new_apicid; + cpus_found++; + } + cpu++; + } + return apicid; +} + +static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) +{ + int num_bits_set; + int cpus_found = 0; + int cpu; + int apicid; + + num_bits_set = cpus_weight(cpumask); + /* Return id to all */ + if (num_bits_set == NR_CPUS) + return cpu_to_logical_apicid(0); + /* + * The cpus in the mask must all be on the apic cluster. If are not + * on the same apicid cluster return default value of TARGET_CPUS. + */ + cpu = first_cpu(cpumask); + apicid = cpu_to_logical_apicid(cpu); + while (cpus_found < num_bits_set) { + if (cpu_isset(cpu, cpumask)) { + int new_apicid = cpu_to_logical_apicid(cpu); + if (apicid_cluster(apicid) != + apicid_cluster(new_apicid)){ + printk ("%s: Not a valid mask!\n", __func__); return cpu_to_logical_apicid(0); -#endif } apicid = new_apicid; cpus_found++; diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index 398493461913..78f0daaee436 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -1,36 +1,12 @@ #ifndef __ASM_ES7000_WAKECPU_H #define __ASM_ES7000_WAKECPU_H -/* - * This file copes with machines that wakeup secondary CPUs by the - * INIT, INIT, STARTUP sequence. - */ - -#ifdef CONFIG_ES7000_CLUSTERED_APIC -#define WAKE_SECONDARY_VIA_MIP -#else -#define WAKE_SECONDARY_VIA_INIT -#endif - -#ifdef WAKE_SECONDARY_VIA_MIP -extern int es7000_start_cpu(int cpu, unsigned long eip); -static inline int -wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) -{ - int boot_error = 0; - boot_error = es7000_start_cpu(phys_apicid, start_eip); - return boot_error; -} -#endif - -#define TRAMPOLINE_LOW phys_to_virt(0x467) -#define TRAMPOLINE_HIGH phys_to_virt(0x469) - -#define boot_cpu_apicid boot_cpu_physical_apicid +#define TRAMPOLINE_PHYS_LOW 0x467 +#define TRAMPOLINE_PHYS_HIGH 0x469 static inline void wait_for_init_deassert(atomic_t *deassert) { -#ifdef WAKE_SECONDARY_VIA_INIT +#ifndef CONFIG_ES7000_CLUSTERED_APIC while (!atomic_read(deassert)) cpu_relax(); #endif @@ -50,9 +26,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { } -#define inquire_remote_apic(apicid) do { \ - if (apic_verbosity >= APIC_DEBUG) \ - __inquire_remote_apic(apicid); \ - } while (0) +extern void __inquire_remote_apic(int apicid); + +static inline void inquire_remote_apic(int apicid) +{ + if (apic_verbosity >= APIC_DEBUG) + __inquire_remote_apic(apicid); +} #endif /* __ASM_MACH_WAKECPU_H */ diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9e8bc29b8b17..7e61b4ceb9a4 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -17,8 +17,40 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) */ return addr - 1; } -#endif +#ifdef CONFIG_DYNAMIC_FTRACE + +struct dyn_arch_ftrace { + /* No extra data needed for x86 */ +}; + +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +#ifndef __ASSEMBLY__ + +/* + * Stack of return addresses for functions + * of a thread. + * Used in struct thread_info + */ +struct ftrace_ret_stack { + unsigned long ret; + unsigned long func; + unsigned long long calltime; +}; + +/* + * Primary handler of a function return. + * It relays on ftrace_return_to_handler. + * Defined in entry32.S + */ +extern void return_to_handler(void); + +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 5cbd4fcc06fd..0ac17d33a8c7 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h @@ -2,6 +2,7 @@ #define _ASM_X86_GENAPIC_32_H #include <asm/mpspec.h> +#include <asm/atomic.h> /* * Generic APIC driver interface. @@ -65,6 +66,14 @@ struct genapic { void (*send_IPI_allbutself)(int vector); void (*send_IPI_all)(int vector); #endif + int (*wakeup_cpu)(int apicid, unsigned long start_eip); + int trampoline_phys_low; + int trampoline_phys_high; + void (*wait_for_init_deassert)(atomic_t *deassert); + void (*smp_callin_clear_local_apic)(void); + void (*store_NMI_vector)(unsigned short *high, unsigned short *low); + void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); + void (*inquire_remote_apic)(int apicid); }; #define APICFUNC(x) .x = x, @@ -105,16 +114,24 @@ struct genapic { APICFUNC(get_apic_id) \ .apic_id_mask = APIC_ID_MASK, \ APICFUNC(cpu_mask_to_apicid) \ - APICFUNC(vector_allocation_domain) \ + APICFUNC(vector_allocation_domain) \ APICFUNC(acpi_madt_oem_check) \ IPIFUNC(send_IPI_mask) \ IPIFUNC(send_IPI_allbutself) \ IPIFUNC(send_IPI_all) \ APICFUNC(enable_apic_mode) \ APICFUNC(phys_pkg_id) \ + .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ + .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ + APICFUNC(wait_for_init_deassert) \ + APICFUNC(smp_callin_clear_local_apic) \ + APICFUNC(store_NMI_vector) \ + APICFUNC(restore_NMI_vector) \ + APICFUNC(inquire_remote_apic) \ } extern struct genapic *genapic; +extern void es7000_update_genapic_to_cluster(void); enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; #define get_uv_system_type() UV_NONE diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index 13c4e96199ea..2cae011668b7 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h @@ -32,6 +32,8 @@ struct genapic { unsigned int (*get_apic_id)(unsigned long x); unsigned long (*set_apic_id)(unsigned int id); unsigned long apic_id_mask; + /* wakeup_secondary_cpu */ + int (*wakeup_cpu)(int apicid, unsigned long start_eip); }; extern struct genapic *genapic; diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index ff3a6c236c00..6cb3a467e067 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h @@ -32,11 +32,13 @@ static inline cpumask_t target_cpus(void) #define vector_allocation_domain (genapic->vector_allocation_domain) #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) #define send_IPI_self (genapic->send_IPI_self) +#define wakeup_secondary_cpu (genapic->wakeup_cpu) extern void setup_apic_routing(void); #else #define INT_DELIVERY_MODE dest_LowestPrio #define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ #define TARGET_CPUS (target_cpus()) +#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init /* * Set up the logical destination ID. * diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index 9d80db91e992..ceb013660146 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -1,17 +1,8 @@ #ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H #define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H -/* - * This file copes with machines that wakeup secondary CPUs by the - * INIT, INIT, STARTUP sequence. - */ - -#define WAKE_SECONDARY_VIA_INIT - -#define TRAMPOLINE_LOW phys_to_virt(0x467) -#define TRAMPOLINE_HIGH phys_to_virt(0x469) - -#define boot_cpu_apicid boot_cpu_physical_apicid +#define TRAMPOLINE_PHYS_LOW (0x467) +#define TRAMPOLINE_PHYS_HIGH (0x469) static inline void wait_for_init_deassert(atomic_t *deassert) { @@ -33,9 +24,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { } -#define inquire_remote_apic(apicid) do { \ - if (apic_verbosity >= APIC_DEBUG) \ - __inquire_remote_apic(apicid); \ - } while (0) +extern void __inquire_remote_apic(int apicid); + +static inline void inquire_remote_apic(int apicid) +{ + if (apic_verbosity >= APIC_DEBUG) + __inquire_remote_apic(apicid); +} #endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/mach-default/smpboot_hooks.h index dbab36d64d48..23bf52103b89 100644 --- a/arch/x86/include/asm/mach-default/smpboot_hooks.h +++ b/arch/x86/include/asm/mach-default/smpboot_hooks.h @@ -13,9 +13,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) CMOS_WRITE(0xa, 0xf); local_flush_tlb(); pr_debug("1.\n"); - *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = + start_eip >> 4; pr_debug("2.\n"); - *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = + start_eip & 0xf; pr_debug("3.\n"); } @@ -32,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void) */ CMOS_WRITE(0, 0xf); - *((volatile long *) phys_to_virt(0x467)) = 0; + *((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; } static inline void __init smpboot_setup_io_apic(void) diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 5180bd7478fb..e430f47df667 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h @@ -27,6 +27,7 @@ #define vector_allocation_domain (genapic->vector_allocation_domain) #define enable_apic_mode (genapic->enable_apic_mode) #define phys_pkg_id (genapic->phys_pkg_id) +#define wakeup_secondary_cpu (genapic->wakeup_cpu) extern void generic_bigsmp_probe(void); diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h new file mode 100644 index 000000000000..1ab16b168c8a --- /dev/null +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h @@ -0,0 +1,12 @@ +#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H +#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H + +#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low) +#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high) +#define wait_for_init_deassert (genapic->wait_for_init_deassert) +#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic) +#define store_NMI_vector (genapic->store_NMI_vector) +#define restore_NMI_vector (genapic->restore_NMI_vector) +#define inquire_remote_apic (genapic->inquire_remote_apic) + +#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index c577bda5b1c5..6f499df8eddb 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h @@ -3,12 +3,8 @@ /* This file copes with machines that wakeup secondary CPUs by NMIs */ -#define WAKE_SECONDARY_VIA_NMI - -#define TRAMPOLINE_LOW phys_to_virt(0x8) -#define TRAMPOLINE_HIGH phys_to_virt(0xa) - -#define boot_cpu_apicid boot_cpu_logical_apicid +#define TRAMPOLINE_PHYS_LOW (0x8) +#define TRAMPOLINE_PHYS_HIGH (0xa) /* We don't do anything here because we use NMI's to boot instead */ static inline void wait_for_init_deassert(atomic_t *deassert) @@ -27,17 +23,23 @@ static inline void smp_callin_clear_local_apic(void) static inline void store_NMI_vector(unsigned short *high, unsigned short *low) { printk("Storing NMI vector\n"); - *high = *((volatile unsigned short *) TRAMPOLINE_HIGH); - *low = *((volatile unsigned short *) TRAMPOLINE_LOW); + *high = + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)); + *low = + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)); } static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { printk("Restoring NMI vector\n"); - *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high; - *((volatile unsigned short *) TRAMPOLINE_LOW) = *low; + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = + *high; + *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = + *low; } -#define inquire_remote_apic(apicid) {} +static inline void inquire_remote_apic(int apicid) +{ +} #endif /* __ASM_NUMAQ_WAKECPU_H */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index f12d37237465..294daeb3a006 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -16,6 +16,8 @@ static inline void visws_early_detect(void) { } static inline int is_visws_box(void) { return 0; } #endif +extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); +extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); /* * Any setup quirks to be performed? */ @@ -39,6 +41,7 @@ struct x86_quirks { void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, unsigned short oemsize); int (*setup_ioapic_ids)(void); + int (*update_genapic)(void); }; extern struct x86_quirks *x86_quirks; diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 2ed3f0f44ff7..07c3e4048991 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -314,6 +314,8 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end); void default_idle(void); +void stop_this_cpu(void *dummy); + /* * Force strict CPU ordering. * And yes, this is required on UP too when we're talking diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e44d379faad2..0921b4018c11 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -20,6 +20,8 @@ struct task_struct; struct exec_domain; #include <asm/processor.h> +#include <asm/ftrace.h> +#include <asm/atomic.h> struct thread_info { struct task_struct *task; /* main task structure */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 35c54921b2e4..99192bb55a53 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -157,6 +157,7 @@ extern int __get_user_bad(void); int __ret_gu; \ unsigned long __val_gu; \ __chk_user_ptr(ptr); \ + might_fault(); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_x(1, __ret_gu, __val_gu, ptr); \ @@ -241,6 +242,7 @@ extern void __put_user_8(void); int __ret_pu; \ __typeof__(*(ptr)) __pu_val; \ __chk_user_ptr(ptr); \ + might_fault(); \ __pu_val = x; \ switch (sizeof(*(ptr))) { \ case 1: \ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index d095a3aeea1b..5e06259e90e5 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) static __always_inline unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n) { - might_sleep(); - return __copy_to_user_inatomic(to, from, n); + might_fault(); + return __copy_to_user_inatomic(to, from, n); } static __always_inline unsigned long @@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { - might_sleep(); + might_fault(); if (__builtin_constant_p(n)) { unsigned long ret; @@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) static __always_inline unsigned long __copy_from_user_nocache(void *to, const void __user *from, unsigned long n) { - might_sleep(); + might_fault(); if (__builtin_constant_p(n)) { unsigned long ret; diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f8cfd00db450..84210c479fca 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -29,6 +29,8 @@ static __always_inline __must_check int __copy_from_user(void *dst, const void __user *src, unsigned size) { int ret = 0; + + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic(dst, (__force void *)src, size); switch (size) { @@ -71,6 +73,8 @@ static __always_inline __must_check int __copy_to_user(void __user *dst, const void *src, unsigned size) { int ret = 0; + + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, src, size); switch (size) { @@ -113,6 +117,8 @@ static __always_inline __must_check int __copy_in_user(void __user *dst, const void __user *src, unsigned size) { int ret = 0; + + might_fault(); if (!__builtin_constant_p(size)) return copy_user_generic((__force void *)dst, (__force void *)src, size); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b62a7667828e..1cad9318d217 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -25,7 +25,7 @@ CFLAGS_tsc.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o -obj-y += time_$(BITS).o ioport.o ldt.o +obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o @@ -65,6 +65,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 4c51a2f8fd31..65d0b72777ea 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1360,6 +1360,17 @@ static void __init acpi_process_madt(void) disable_acpi(); } } + + /* + * ACPI supports both logical (e.g. Hyper-Threading) and physical + * processors, where MPS only supports physical. + */ + if (acpi_lapic && acpi_ioapic) + printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " + "information\n"); + else if (acpi_lapic) + printk(KERN_INFO "Using ACPI for processor (LAPIC) " + "configuration information\n"); #endif return; } diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 5662e226b0c9..a7b6dec6fc3f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -344,7 +344,7 @@ static int iommu_map(struct protection_domain *dom, u64 __pte, *pte, *page; bus_addr = PAGE_ALIGN(bus_addr); - phys_addr = PAGE_ALIGN(bus_addr); + phys_addr = PAGE_ALIGN(phys_addr); /* only support 512GB address spaces for now */ if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) @@ -600,7 +600,7 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) continue; p2 = IOMMU_PTE_PAGE(p1[i]); - for (j = 0; j < 512; ++i) { + for (j = 0; j < 512; ++j) { if (!IOMMU_PTE_PRESENT(p2[j])) continue; p3 = IOMMU_PTE_PAGE(p2[j]); @@ -910,7 +910,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, if (address >= dom->aperture_size) return; - WARN_ON(address & 0xfffULL || address > dom->aperture_size); + WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; pte += IOMMU_PTE_L0_INDEX(address); @@ -922,8 +922,8 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, /* * This function contains common code for mapping of a physically - * contiguous memory region into DMA address space. It is uses by all - * mapping functions provided by this IOMMU driver. + * contiguous memory region into DMA address space. It is used by all + * mapping functions provided with this IOMMU driver. * Must be called with the domain lock held. */ static dma_addr_t __map_single(struct device *dev, @@ -983,7 +983,8 @@ static void __unmap_single(struct amd_iommu *iommu, dma_addr_t i, start; unsigned int pages; - if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) + if ((dma_addr == bad_dma_address) || + (dma_addr + size > dma_dom->aperture_size)) return; pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 5145a6e72bbb..3a26525a3f31 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -391,11 +391,7 @@ static int power_off; #else static int power_off = 1; #endif -#ifdef CONFIG_APM_REAL_MODE_POWER_OFF -static int realmode_power_off = 1; -#else static int realmode_power_off; -#endif #ifdef CONFIG_APM_ALLOW_INTS static int allow_ints = 1; #else diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8e48c5d4467d..88ea02dcb622 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -33,6 +33,7 @@ #include <linux/cpufreq.h> #include <linux/compiler.h> #include <linux/dmi.h> +#include <linux/ftrace.h> #include <linux/acpi.h> #include <acpi/processor.h> @@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int next_perf_state = 0; /* Index into perf table */ unsigned int i; int result = 0; + struct power_trace it; dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); @@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, } } + trace_power_mark(&it, POWER_PSTATE, next_perf_state); + switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index cce0b6118d55..816f27f289b1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -307,12 +307,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_P4); if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_P3); +#endif if (cpu_has_bts) ptrace_bts_init_intel(c); -#endif - detect_extended_topology(c); if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { /* diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index a2d1176c38ee..19a8c2c0389f 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -7,13 +7,12 @@ * * It manages: * - per-thread and per-cpu allocation of BTS and PEBS - * - buffer memory allocation (optional) - * - buffer overflow handling + * - buffer overflow handling (to be done) * - buffer access * * It assumes: - * - get_task_struct on all parameter tasks - * - current is allowed to trace parameter tasks + * - get_task_struct on all traced tasks + * - current is allowed to trace tasks * * * Copyright (C) 2007-2008 Intel Corporation. @@ -28,6 +27,7 @@ #include <linux/slab.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/kernel.h> /* @@ -44,6 +44,33 @@ struct ds_configuration { }; static struct ds_configuration ds_cfg; +/* + * A BTS or PEBS tracer. + * + * This holds the configuration of the tracer and serves as a handle + * to identify tracers. + */ +struct ds_tracer { + /* the DS context (partially) owned by this tracer */ + struct ds_context *context; + /* the buffer provided on ds_request() and its size in bytes */ + void *buffer; + size_t size; +}; + +struct bts_tracer { + /* the common DS part */ + struct ds_tracer ds; + /* buffer overflow notification function */ + bts_ovfl_callback_t ovfl; +}; + +struct pebs_tracer { + /* the common DS part */ + struct ds_tracer ds; + /* buffer overflow notification function */ + pebs_ovfl_callback_t ovfl; +}; /* * Debug Store (DS) save area configuration (see Intel64 and IA32 @@ -107,34 +134,13 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, (*(unsigned long *)base) = value; } +#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ -/* - * Locking is done only for allocating BTS or PEBS resources and for - * guarding context and buffer memory allocation. - * - * Most functions require the current task to own the ds context part - * they are going to access. All the locking is done when validating - * access to the context. - */ -static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); /* - * Validate that the current task is allowed to access the BTS/PEBS - * buffer of the parameter task. - * - * Returns 0, if access is granted; -Eerrno, otherwise. + * Locking is done only for allocating BTS or PEBS resources. */ -static inline int ds_validate_access(struct ds_context *context, - enum ds_qualifier qual) -{ - if (!context) - return -EPERM; - - if (context->owner[qual] == current) - return 0; - - return -EPERM; -} +static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); /* @@ -183,51 +189,13 @@ static inline int check_tracer(struct task_struct *task) * * Contexts are use-counted. They are allocated on first access and * deallocated when the last user puts the context. - * - * We distinguish between an allocating and a non-allocating get of a - * context: - * - the allocating get is used for requesting BTS/PEBS resources. It - * requires the caller to hold the global ds_lock. - * - the non-allocating get is used for all other cases. A - * non-existing context indicates an error. It acquires and releases - * the ds_lock itself for obtaining the context. - * - * A context and its DS configuration are allocated and deallocated - * together. A context always has a DS configuration of the - * appropriate size. */ static DEFINE_PER_CPU(struct ds_context *, system_context); #define this_system_context per_cpu(system_context, smp_processor_id()) -/* - * Returns the pointer to the parameter task's context or to the - * system-wide context, if task is NULL. - * - * Increases the use count of the returned context, if not NULL. - */ static inline struct ds_context *ds_get_context(struct task_struct *task) { - struct ds_context *context; - unsigned long irq; - - spin_lock_irqsave(&ds_lock, irq); - - context = (task ? task->thread.ds_ctx : this_system_context); - if (context) - context->count++; - - spin_unlock_irqrestore(&ds_lock, irq); - - return context; -} - -/* - * Same as ds_get_context, but allocates the context and it's DS - * structure, if necessary; returns NULL; if out of memory. - */ -static inline struct ds_context *ds_alloc_context(struct task_struct *task) -{ struct ds_context **p_context = (task ? &task->thread.ds_ctx : &this_system_context); struct ds_context *context = *p_context; @@ -238,16 +206,9 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) if (!context) return NULL; - context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); - if (!context->ds) { - kfree(context); - return NULL; - } - spin_lock_irqsave(&ds_lock, irq); if (*p_context) { - kfree(context->ds); kfree(context); context = *p_context; @@ -272,10 +233,6 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) return context; } -/* - * Decreases the use count of the parameter context, if not NULL. - * Deallocates the context, if the use count reaches zero. - */ static inline void ds_put_context(struct ds_context *context) { unsigned long irq; @@ -296,13 +253,6 @@ static inline void ds_put_context(struct ds_context *context) if (!context->task || (context->task == current)) wrmsrl(MSR_IA32_DS_AREA, 0); - put_tracer(context->task); - - /* free any leftover buffers from tracers that did not - * deallocate them properly. */ - kfree(context->buffer[ds_bts]); - kfree(context->buffer[ds_pebs]); - kfree(context->ds); kfree(context); out: spin_unlock_irqrestore(&ds_lock, irq); @@ -312,345 +262,342 @@ static inline void ds_put_context(struct ds_context *context) /* * Handle a buffer overflow * - * task: the task whose buffers are overflowing; - * NULL for a buffer overflow on the current cpu * context: the ds context * qual: the buffer type */ -static void ds_overflow(struct task_struct *task, struct ds_context *context, - enum ds_qualifier qual) -{ - if (!context) - return; - - if (context->callback[qual]) - (*context->callback[qual])(task); - - /* todo: do some more overflow handling */ +static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) +{ + switch (qual) { + case ds_bts: { + struct bts_tracer *tracer = + container_of(context->owner[qual], + struct bts_tracer, ds); + if (tracer->ovfl) + tracer->ovfl(tracer); + } + break; + case ds_pebs: { + struct pebs_tracer *tracer = + container_of(context->owner[qual], + struct pebs_tracer, ds); + if (tracer->ovfl) + tracer->ovfl(tracer); + } + break; + } } -/* - * Allocate a non-pageable buffer of the parameter size. - * Checks the memory and the locked memory rlimit. - * - * Returns the buffer, if successful; - * NULL, if out of memory or rlimit exceeded. - * - * size: the requested buffer size in bytes - * pages (out): if not NULL, contains the number of pages reserved - */ -static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) +static void ds_install_ds_config(struct ds_context *context, + enum ds_qualifier qual, + void *base, size_t size, size_t ith) { - unsigned long rlim, vm, pgsz; - void *buffer; - - pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; - - rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; - vm = current->mm->total_vm + pgsz; - if (rlim < vm) - return NULL; + unsigned long buffer, adj; - rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - vm = current->mm->locked_vm + pgsz; - if (rlim < vm) - return NULL; + /* adjust the buffer address and size to meet alignment + * constraints: + * - buffer is double-word aligned + * - size is multiple of record size + * + * We checked the size at the very beginning; we have enough + * space to do the adjustment. + */ + buffer = (unsigned long)base; - buffer = kzalloc(size, GFP_KERNEL); - if (!buffer) - return NULL; + adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; + buffer += adj; + size -= adj; - current->mm->total_vm += pgsz; - current->mm->locked_vm += pgsz; + size /= ds_cfg.sizeof_rec[qual]; + size *= ds_cfg.sizeof_rec[qual]; - if (pages) - *pages = pgsz; + ds_set(context->ds, qual, ds_buffer_base, buffer); + ds_set(context->ds, qual, ds_index, buffer); + ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); - return buffer; + /* The value for 'no threshold' is -1, which will set the + * threshold outside of the buffer, just like we want it. + */ + ds_set(context->ds, qual, + ds_interrupt_threshold, buffer + size - ith); } -static int ds_request(struct task_struct *task, void *base, size_t size, - ds_ovfl_callback_t ovfl, enum ds_qualifier qual) +static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual, + struct task_struct *task, + void *base, size_t size, size_t th) { struct ds_context *context; - unsigned long buffer, adj; - const unsigned long alignment = (1 << 3); unsigned long irq; - int error = 0; + int error; + error = -EOPNOTSUPP; if (!ds_cfg.sizeof_ds) - return -EOPNOTSUPP; + goto out; + + error = -EINVAL; + if (!base) + goto out; /* we require some space to do alignment adjustments below */ - if (size < (alignment + ds_cfg.sizeof_rec[qual])) - return -EINVAL; + error = -EINVAL; + if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) + goto out; - /* buffer overflow notification is not yet implemented */ - if (ovfl) - return -EOPNOTSUPP; + if (th != (size_t)-1) { + th *= ds_cfg.sizeof_rec[qual]; + + error = -EINVAL; + if (size <= th) + goto out; + } + tracer->buffer = base; + tracer->size = size; - context = ds_alloc_context(task); + error = -ENOMEM; + context = ds_get_context(task); if (!context) - return -ENOMEM; + goto out; + tracer->context = context; + spin_lock_irqsave(&ds_lock, irq); error = -EPERM; if (!check_tracer(task)) goto out_unlock; - get_tracer(task); - error = -EALREADY; - if (context->owner[qual] == current) - goto out_put_tracer; error = -EPERM; - if (context->owner[qual] != NULL) + if (context->owner[qual]) goto out_put_tracer; - context->owner[qual] = current; + context->owner[qual] = tracer; spin_unlock_irqrestore(&ds_lock, irq); - error = -ENOMEM; - if (!base) { - base = ds_allocate_buffer(size, &context->pages[qual]); - if (!base) - goto out_release; - - context->buffer[qual] = base; - } - error = 0; + ds_install_ds_config(context, qual, base, size, th); - context->callback[qual] = ovfl; - - /* adjust the buffer address and size to meet alignment - * constraints: - * - buffer is double-word aligned - * - size is multiple of record size - * - * We checked the size at the very beginning; we have enough - * space to do the adjustment. - */ - buffer = (unsigned long)base; - - adj = ALIGN(buffer, alignment) - buffer; - buffer += adj; - size -= adj; - - size /= ds_cfg.sizeof_rec[qual]; - size *= ds_cfg.sizeof_rec[qual]; - - ds_set(context->ds, qual, ds_buffer_base, buffer); - ds_set(context->ds, qual, ds_index, buffer); - ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); - - if (ovfl) { - /* todo: select a suitable interrupt threshold */ - } else - ds_set(context->ds, qual, - ds_interrupt_threshold, buffer + size + 1); - - /* we keep the context until ds_release */ - return error; - - out_release: - context->owner[qual] = NULL; - ds_put_context(context); - put_tracer(task); - return error; + return 0; out_put_tracer: - spin_unlock_irqrestore(&ds_lock, irq); - ds_put_context(context); put_tracer(task); - return error; - out_unlock: spin_unlock_irqrestore(&ds_lock, irq); ds_put_context(context); + tracer->context = NULL; + out: return error; } -int ds_request_bts(struct task_struct *task, void *base, size_t size, - ds_ovfl_callback_t ovfl) +struct bts_tracer *ds_request_bts(struct task_struct *task, + void *base, size_t size, + bts_ovfl_callback_t ovfl, size_t th) { - return ds_request(task, base, size, ovfl, ds_bts); -} + struct bts_tracer *tracer; + int error; -int ds_request_pebs(struct task_struct *task, void *base, size_t size, - ds_ovfl_callback_t ovfl) -{ - return ds_request(task, base, size, ovfl, ds_pebs); + /* buffer overflow notification is not yet implemented */ + error = -EOPNOTSUPP; + if (ovfl) + goto out; + + error = -ENOMEM; + tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); + if (!tracer) + goto out; + tracer->ovfl = ovfl; + + error = ds_request(&tracer->ds, ds_bts, task, base, size, th); + if (error < 0) + goto out_tracer; + + return tracer; + + out_tracer: + kfree(tracer); + out: + return ERR_PTR(error); } -static int ds_release(struct task_struct *task, enum ds_qualifier qual) +struct pebs_tracer *ds_request_pebs(struct task_struct *task, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, size_t th) { - struct ds_context *context; + struct pebs_tracer *tracer; int error; - context = ds_get_context(task); - error = ds_validate_access(context, qual); - if (error < 0) + /* buffer overflow notification is not yet implemented */ + error = -EOPNOTSUPP; + if (ovfl) goto out; - kfree(context->buffer[qual]); - context->buffer[qual] = NULL; + error = -ENOMEM; + tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); + if (!tracer) + goto out; + tracer->ovfl = ovfl; - current->mm->total_vm -= context->pages[qual]; - current->mm->locked_vm -= context->pages[qual]; - context->pages[qual] = 0; - context->owner[qual] = NULL; + error = ds_request(&tracer->ds, ds_pebs, task, base, size, th); + if (error < 0) + goto out_tracer; - /* - * we put the context twice: - * once for the ds_get_context - * once for the corresponding ds_request - */ - ds_put_context(context); + return tracer; + + out_tracer: + kfree(tracer); out: - ds_put_context(context); - return error; + return ERR_PTR(error); } -int ds_release_bts(struct task_struct *task) +static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual) { - return ds_release(task, ds_bts); + BUG_ON(tracer->context->owner[qual] != tracer); + tracer->context->owner[qual] = NULL; + + put_tracer(tracer->context->task); + ds_put_context(tracer->context); } -int ds_release_pebs(struct task_struct *task) +int ds_release_bts(struct bts_tracer *tracer) { - return ds_release(task, ds_pebs); + if (!tracer) + return -EINVAL; + + ds_release(&tracer->ds, ds_bts); + kfree(tracer); + + return 0; } -static int ds_get_index(struct task_struct *task, size_t *pos, - enum ds_qualifier qual) +int ds_release_pebs(struct pebs_tracer *tracer) { - struct ds_context *context; - unsigned long base, index; - int error; + if (!tracer) + return -EINVAL; - context = ds_get_context(task); - error = ds_validate_access(context, qual); - if (error < 0) - goto out; + ds_release(&tracer->ds, ds_pebs); + kfree(tracer); + + return 0; +} + +static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual) +{ + unsigned long base, index; base = ds_get(context->ds, qual, ds_buffer_base); index = ds_get(context->ds, qual, ds_index); - error = ((index - base) / ds_cfg.sizeof_rec[qual]); - if (pos) - *pos = error; - out: - ds_put_context(context); - return error; + return (index - base) / ds_cfg.sizeof_rec[qual]; } -int ds_get_bts_index(struct task_struct *task, size_t *pos) +int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos) { - return ds_get_index(task, pos, ds_bts); + if (!tracer) + return -EINVAL; + + if (!pos) + return -EINVAL; + + *pos = ds_get_index(tracer->ds.context, ds_bts); + + return 0; } -int ds_get_pebs_index(struct task_struct *task, size_t *pos) +int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos) { - return ds_get_index(task, pos, ds_pebs); + if (!tracer) + return -EINVAL; + + if (!pos) + return -EINVAL; + + *pos = ds_get_index(tracer->ds.context, ds_pebs); + + return 0; } -static int ds_get_end(struct task_struct *task, size_t *pos, - enum ds_qualifier qual) +static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual) { - struct ds_context *context; - unsigned long base, end; - int error; - - context = ds_get_context(task); - error = ds_validate_access(context, qual); - if (error < 0) - goto out; + unsigned long base, max; base = ds_get(context->ds, qual, ds_buffer_base); - end = ds_get(context->ds, qual, ds_absolute_maximum); + max = ds_get(context->ds, qual, ds_absolute_maximum); - error = ((end - base) / ds_cfg.sizeof_rec[qual]); - if (pos) - *pos = error; - out: - ds_put_context(context); - return error; + return (max - base) / ds_cfg.sizeof_rec[qual]; } -int ds_get_bts_end(struct task_struct *task, size_t *pos) +int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos) { - return ds_get_end(task, pos, ds_bts); + if (!tracer) + return -EINVAL; + + if (!pos) + return -EINVAL; + + *pos = ds_get_end(tracer->ds.context, ds_bts); + + return 0; } -int ds_get_pebs_end(struct task_struct *task, size_t *pos) +int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos) { - return ds_get_end(task, pos, ds_pebs); + if (!tracer) + return -EINVAL; + + if (!pos) + return -EINVAL; + + *pos = ds_get_end(tracer->ds.context, ds_pebs); + + return 0; } -static int ds_access(struct task_struct *task, size_t index, - const void **record, enum ds_qualifier qual) +static int ds_access(struct ds_context *context, enum ds_qualifier qual, + size_t index, const void **record) { - struct ds_context *context; unsigned long base, idx; - int error; if (!record) return -EINVAL; - context = ds_get_context(task); - error = ds_validate_access(context, qual); - if (error < 0) - goto out; - base = ds_get(context->ds, qual, ds_buffer_base); idx = base + (index * ds_cfg.sizeof_rec[qual]); - error = -EINVAL; if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) - goto out; + return -EINVAL; *record = (const void *)idx; - error = ds_cfg.sizeof_rec[qual]; - out: - ds_put_context(context); - return error; + + return ds_cfg.sizeof_rec[qual]; } -int ds_access_bts(struct task_struct *task, size_t index, const void **record) +int ds_access_bts(struct bts_tracer *tracer, size_t index, + const void **record) { - return ds_access(task, index, record, ds_bts); + if (!tracer) + return -EINVAL; + + return ds_access(tracer->ds.context, ds_bts, index, record); } -int ds_access_pebs(struct task_struct *task, size_t index, const void **record) +int ds_access_pebs(struct pebs_tracer *tracer, size_t index, + const void **record) { - return ds_access(task, index, record, ds_pebs); + if (!tracer) + return -EINVAL; + + return ds_access(tracer->ds.context, ds_pebs, index, record); } -static int ds_write(struct task_struct *task, const void *record, size_t size, - enum ds_qualifier qual, int force) +static int ds_write(struct ds_context *context, enum ds_qualifier qual, + const void *record, size_t size) { - struct ds_context *context; - int error; + int bytes_written = 0; if (!record) return -EINVAL; - error = -EPERM; - context = ds_get_context(task); - if (!context) - goto out; - - if (!force) { - error = ds_validate_access(context, qual); - if (error < 0) - goto out; - } - - error = 0; while (size) { unsigned long base, index, end, write_end, int_th; unsigned long write_size, adj_write_size; @@ -678,14 +625,14 @@ static int ds_write(struct task_struct *task, const void *record, size_t size, write_end = end; if (write_end <= index) - goto out; + break; write_size = min((unsigned long) size, write_end - index); memcpy((void *)index, record, write_size); record = (const char *)record + write_size; - size -= write_size; - error += write_size; + size -= write_size; + bytes_written += write_size; adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; adj_write_size *= ds_cfg.sizeof_rec[qual]; @@ -700,47 +647,32 @@ static int ds_write(struct task_struct *task, const void *record, size_t size, ds_set(context->ds, qual, ds_index, index); if (index >= int_th) - ds_overflow(task, context, qual); + ds_overflow(context, qual); } - out: - ds_put_context(context); - return error; + return bytes_written; } -int ds_write_bts(struct task_struct *task, const void *record, size_t size) +int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size) { - return ds_write(task, record, size, ds_bts, /* force = */ 0); -} + if (!tracer) + return -EINVAL; -int ds_write_pebs(struct task_struct *task, const void *record, size_t size) -{ - return ds_write(task, record, size, ds_pebs, /* force = */ 0); + return ds_write(tracer->ds.context, ds_bts, record, size); } -int ds_unchecked_write_bts(struct task_struct *task, - const void *record, size_t size) +int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size) { - return ds_write(task, record, size, ds_bts, /* force = */ 1); -} + if (!tracer) + return -EINVAL; -int ds_unchecked_write_pebs(struct task_struct *task, - const void *record, size_t size) -{ - return ds_write(task, record, size, ds_pebs, /* force = */ 1); + return ds_write(tracer->ds.context, ds_pebs, record, size); } -static int ds_reset_or_clear(struct task_struct *task, - enum ds_qualifier qual, int clear) +static void ds_reset_or_clear(struct ds_context *context, + enum ds_qualifier qual, int clear) { - struct ds_context *context; unsigned long base, end; - int error; - - context = ds_get_context(task); - error = ds_validate_access(context, qual); - if (error < 0) - goto out; base = ds_get(context->ds, qual, ds_buffer_base); end = ds_get(context->ds, qual, ds_absolute_maximum); @@ -749,70 +681,69 @@ static int ds_reset_or_clear(struct task_struct *task, memset((void *)base, 0, end - base); ds_set(context->ds, qual, ds_index, base); - - error = 0; - out: - ds_put_context(context); - return error; } -int ds_reset_bts(struct task_struct *task) +int ds_reset_bts(struct bts_tracer *tracer) { - return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); + if (!tracer) + return -EINVAL; + + ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0); + + return 0; } -int ds_reset_pebs(struct task_struct *task) +int ds_reset_pebs(struct pebs_tracer *tracer) { - return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); + if (!tracer) + return -EINVAL; + + ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0); + + return 0; } -int ds_clear_bts(struct task_struct *task) +int ds_clear_bts(struct bts_tracer *tracer) { - return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); + if (!tracer) + return -EINVAL; + + ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1); + + return 0; } -int ds_clear_pebs(struct task_struct *task) +int ds_clear_pebs(struct pebs_tracer *tracer) { - return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); + if (!tracer) + return -EINVAL; + + ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1); + + return 0; } -int ds_get_pebs_reset(struct task_struct *task, u64 *value) +int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value) { - struct ds_context *context; - int error; + if (!tracer) + return -EINVAL; if (!value) return -EINVAL; - context = ds_get_context(task); - error = ds_validate_access(context, ds_pebs); - if (error < 0) - goto out; + *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); - *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); - - error = 0; - out: - ds_put_context(context); - return error; + return 0; } -int ds_set_pebs_reset(struct task_struct *task, u64 value) +int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) { - struct ds_context *context; - int error; - - context = ds_get_context(task); - error = ds_validate_access(context, ds_pebs); - if (error < 0) - goto out; + if (!tracer) + return -EINVAL; - *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; + *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; - error = 0; - out: - ds_put_context(context); - return error; + return 0; } static const struct ds_configuration ds_cfg_var = { @@ -840,6 +771,10 @@ static inline void ds_configure(const struct ds_configuration *cfg) { ds_cfg = *cfg; + + printk(KERN_INFO "DS available\n"); + + BUG_ON(MAX_SIZEOF_DS < ds_cfg.sizeof_ds); } void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) @@ -847,17 +782,16 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { + case 0 ... 0xC: + /* sorry, don't know about them */ + break; case 0xD: case 0xE: /* Pentium M */ ds_configure(&ds_cfg_var); break; - case 0xF: /* Core2 */ - case 0x1C: /* Atom */ + default: /* Core2, Atom, ... */ ds_configure(&ds_cfg_64); break; - default: - /* sorry, don't know about them */ - break; } break; case 0xF: @@ -884,6 +818,8 @@ void ds_free(struct ds_context *context) * is dying. There should not be any user of that context left * to disturb us, anymore. */ unsigned long leftovers = context->count; - while (leftovers--) + while (leftovers--) { + put_tracer(context->task); ds_put_context(context); + } } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c new file mode 100644 index 000000000000..6b1f6f6f8661 --- /dev/null +++ b/arch/x86/kernel/dumpstack.c @@ -0,0 +1,351 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + */ +#include <linux/kallsyms.h> +#include <linux/kprobes.h> +#include <linux/uaccess.h> +#include <linux/utsname.h> +#include <linux/hardirq.h> +#include <linux/kdebug.h> +#include <linux/module.h> +#include <linux/ptrace.h> +#include <linux/kexec.h> +#include <linux/bug.h> +#include <linux/nmi.h> +#include <linux/sysfs.h> + +#include <asm/stacktrace.h> + +#include "dumpstack.h" + +int panic_on_unrecovered_nmi; +unsigned int code_bytes = 64; +int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; +static int die_counter; + +void printk_address(unsigned long address, int reliable) +{ + printk(" [<%p>] %s%pS\n", (void *) address, + reliable ? "" : "? ", (void *) address); +} + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static void +print_ftrace_graph_addr(unsigned long addr, void *data, + const struct stacktrace_ops *ops, + struct thread_info *tinfo, int *graph) +{ + struct task_struct *task = tinfo->task; + unsigned long ret_addr; + int index = task->curr_ret_stack; + + if (addr != (unsigned long)return_to_handler) + return; + + if (!task->ret_stack || index < *graph) + return; + + index -= *graph; + ret_addr = task->ret_stack[index].ret; + + ops->address(data, ret_addr, 1); + + (*graph)++; +} +#else +static inline void +print_ftrace_graph_addr(unsigned long addr, void *data, + const struct stacktrace_ops *ops, + struct thread_info *tinfo, int *graph) +{ } +#endif + +/* + * x86-64 can have up to three kernel stacks: + * process stack + * interrupt stack + * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack + */ + +static inline int valid_stack_ptr(struct thread_info *tinfo, + void *p, unsigned int size, void *end) +{ + void *t = tinfo; + if (end) { + if (p < end && p >= (end-THREAD_SIZE)) + return 1; + else + return 0; + } + return p > t && p < t + THREAD_SIZE - size; +} + +unsigned long +print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end, int *graph) +{ + struct stack_frame *frame = (struct stack_frame *)bp; + + while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { + unsigned long addr; + + addr = *stack; + if (__kernel_text_address(addr)) { + if ((unsigned long) stack == bp + sizeof(long)) { + ops->address(data, addr, 1); + frame = frame->next_frame; + bp = (unsigned long) frame; + } else { + ops->address(data, addr, bp == 0); + } + print_ftrace_graph_addr(addr, data, ops, tinfo, graph); + } + stack++; + } + return bp; +} + + +static void +print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) +{ + printk(data); + print_symbol(msg, symbol); + printk("\n"); +} + +static void print_trace_warning(void *data, char *msg) +{ + printk("%s%s\n", (char *)data, msg); +} + +static int print_trace_stack(void *data, char *name) +{ + printk("%s <%s> ", (char *)data, name); + return 0; +} + +/* + * Print one address/symbol entries per line. + */ +static void print_trace_address(void *data, unsigned long addr, int reliable) +{ + touch_nmi_watchdog(); + printk(data); + printk_address(addr, reliable); +} + +static const struct stacktrace_ops print_trace_ops = { + .warning = print_trace_warning, + .warning_symbol = print_trace_warning_symbol, + .stack = print_trace_stack, + .address = print_trace_address, +}; + +void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, char *log_lvl) +{ + printk("%sCall Trace:\n", log_lvl); + dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); +} + +void show_trace(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp) +{ + show_trace_log_lvl(task, regs, stack, bp, ""); +} + +void show_stack(struct task_struct *task, unsigned long *sp) +{ + show_stack_log_lvl(task, NULL, sp, 0, ""); +} + +/* + * The architecture-independent dump_stack generator + */ +void dump_stack(void) +{ + unsigned long bp = 0; + unsigned long stack; + +#ifdef CONFIG_FRAME_POINTER + if (!bp) + get_bp(bp); +#endif + + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + show_trace(NULL, NULL, &stack, bp); +} +EXPORT_SYMBOL(dump_stack); + +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; + +unsigned __kprobes long oops_begin(void) +{ + int cpu; + unsigned long flags; + + oops_enter(); + + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!__raw_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + __raw_spin_lock(&die_lock); + } + die_nest_count++; + die_owner = cpu; + console_verbose(); + bust_spinlocks(1); + return flags; +} + +void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) +{ + if (regs && kexec_should_crash(current)) + crash_kexec(regs); + + bust_spinlocks(0); + die_owner = -1; + add_taint(TAINT_DIE); + die_nest_count--; + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + __raw_spin_unlock(&die_lock); + raw_local_irq_restore(flags); + oops_exit(); + + if (!signr) + return; + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception"); + do_exit(signr); +} + +int __kprobes __die(const char *str, struct pt_regs *regs, long err) +{ +#ifdef CONFIG_X86_32 + unsigned short ss; + unsigned long sp; +#endif + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP "); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC"); +#endif + printk("\n"); + sysfs_printk_last_file(); + if (notify_die(DIE_OOPS, str, regs, err, + current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) + return 1; + + show_registers(regs); +#ifdef CONFIG_X86_32 + sp = (unsigned long) (®s->sp); + savesegment(ss, ss); + if (user_mode(regs)) { + sp = regs->sp; + ss = regs->ss & 0xffff; + } + printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); + print_symbol("%s", regs->ip); + printk(" SS:ESP %04x:%08lx\n", ss, sp); +#else + /* Executive summary in case the oops scrolled away */ + printk(KERN_ALERT "RIP "); + printk_address(regs->ip, 1); + printk(" RSP <%016lx>\n", regs->sp); +#endif + return 0; +} + +/* + * This is gone through when something in the kernel has done something bad + * and is about to be terminated: + */ +void die(const char *str, struct pt_regs *regs, long err) +{ + unsigned long flags = oops_begin(); + int sig = SIGSEGV; + + if (!user_mode_vm(regs)) + report_bug(regs->ip, regs); + + if (__die(str, regs, err)) + sig = 0; + oops_end(flags, regs, sig); +} + +void notrace __kprobes +die_nmi(char *str, struct pt_regs *regs, int do_panic) +{ + unsigned long flags; + + if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; + + /* + * We are in trouble anyway, lets at least try + * to get a message out. + */ + flags = oops_begin(); + printk(KERN_EMERG "%s", str); + printk(" on CPU%d, ip %08lx, registers:\n", + smp_processor_id(), regs->ip); + show_registers(regs); + oops_end(flags, regs, 0); + if (do_panic || panic_on_oops) + panic("Non maskable interrupt"); + nmi_exit(); + local_irq_enable(); + do_exit(SIGBUS); +} + +static int __init oops_setup(char *s) +{ + if (!s) + return -EINVAL; + if (!strcmp(s, "panic")) + panic_on_oops = 1; + return 0; +} +early_param("oops", oops_setup); + +static int __init kstack_setup(char *s) +{ + if (!s) + return -EINVAL; + kstack_depth_to_print = simple_strtoul(s, NULL, 0); + return 0; +} +early_param("kstack", kstack_setup); + +static int __init code_bytes_setup(char *s) +{ + code_bytes = simple_strtoul(s, NULL, 0); + if (code_bytes > 8192) + code_bytes = 8192; + + return 1; +} +__setup("code_bytes=", code_bytes_setup); diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h new file mode 100644 index 000000000000..da87590b8698 --- /dev/null +++ b/arch/x86/kernel/dumpstack.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs + */ + +#ifndef DUMPSTACK_H +#define DUMPSTACK_H + +#ifdef CONFIG_X86_32 +#define STACKSLOTS_PER_LINE 8 +#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) +#else +#define STACKSLOTS_PER_LINE 4 +#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) +#endif + +extern unsigned long +print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long bp, + const struct stacktrace_ops *ops, void *data, + unsigned long *end, int *graph); + +extern void +show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *stack, unsigned long bp, char *log_lvl); + +extern void +show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, + unsigned long *sp, unsigned long bp, char *log_lvl); + +extern unsigned int code_bytes; +extern int kstack_depth_to_print; + +/* The form of the top of the frame on the stack */ +struct stack_frame { + struct stack_frame *next_frame; + unsigned long return_address; +}; +#endif diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index b3614752197b..d593cd1f58dc 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -17,69 +17,14 @@ #include <asm/stacktrace.h> -#define STACKSLOTS_PER_LINE 8 -#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) - -int panic_on_unrecovered_nmi; -int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; -static unsigned int code_bytes = 64; -static int die_counter; - -void printk_address(unsigned long address, int reliable) -{ - printk(" [<%p>] %s%pS\n", (void *) address, - reliable ? "" : "? ", (void *) address); -} - -static inline int valid_stack_ptr(struct thread_info *tinfo, - void *p, unsigned int size, void *end) -{ - void *t = tinfo; - if (end) { - if (p < end && p >= (end-THREAD_SIZE)) - return 1; - else - return 0; - } - return p > t && p < t + THREAD_SIZE - size; -} - -/* The form of the top of the frame on the stack */ -struct stack_frame { - struct stack_frame *next_frame; - unsigned long return_address; -}; - -static inline unsigned long -print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end) -{ - struct stack_frame *frame = (struct stack_frame *)bp; - - while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { - unsigned long addr; - - addr = *stack; - if (__kernel_text_address(addr)) { - if ((unsigned long) stack == bp + sizeof(long)) { - ops->address(data, addr, 1); - frame = frame->next_frame; - bp = (unsigned long) frame; - } else { - ops->address(data, addr, bp == 0); - } - } - stack++; - } - return bp; -} +#include "dumpstack.h" void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) { + int graph = 0; + if (!task) task = current; @@ -107,7 +52,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, context = (struct thread_info *) ((unsigned long)stack & (~(THREAD_SIZE - 1))); - bp = print_context_stack(context, stack, bp, ops, data, NULL); + bp = print_context_stack(context, stack, bp, ops, + data, NULL, &graph); stack = (unsigned long *)context->previous_esp; if (!stack) @@ -119,57 +65,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, } EXPORT_SYMBOL(dump_trace); -static void -print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - printk(data); - print_symbol(msg, symbol); - printk("\n"); -} - -static void print_trace_warning(void *data, char *msg) -{ - printk("%s%s\n", (char *)data, msg); -} - -static int print_trace_stack(void *data, char *name) -{ - printk("%s <%s> ", (char *)data, name); - return 0; -} - -/* - * Print one address/symbol entries per line. - */ -static void print_trace_address(void *data, unsigned long addr, int reliable) -{ - touch_nmi_watchdog(); - printk(data); - printk_address(addr, reliable); -} - -static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, -}; - -static void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) -{ - printk("%sCall Trace:\n", log_lvl); - dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); -} - -void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp) -{ - show_trace_log_lvl(task, regs, stack, bp, ""); -} - -static void +void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *sp, unsigned long bp, char *log_lvl) { @@ -196,33 +92,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, show_trace_log_lvl(task, regs, sp, bp, log_lvl); } -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_log_lvl(task, NULL, sp, 0, ""); -} - -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long bp = 0; - unsigned long stack; - -#ifdef CONFIG_FRAME_POINTER - if (!bp) - get_bp(bp); -#endif - - printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - show_trace(NULL, NULL, &stack, bp); -} - -EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { @@ -283,167 +152,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } -static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; -static int die_owner = -1; -static unsigned int die_nest_count; - -unsigned __kprobes long oops_begin(void) -{ - unsigned long flags; - - oops_enter(); - - if (die_owner != raw_smp_processor_id()) { - console_verbose(); - raw_local_irq_save(flags); - __raw_spin_lock(&die_lock); - die_owner = smp_processor_id(); - die_nest_count = 0; - bust_spinlocks(1); - } else { - raw_local_irq_save(flags); - } - die_nest_count++; - return flags; -} - -void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) -{ - bust_spinlocks(0); - die_owner = -1; - add_taint(TAINT_DIE); - __raw_spin_unlock(&die_lock); - raw_local_irq_restore(flags); - - if (!regs) - return; - - if (kexec_should_crash(current)) - crash_kexec(regs); - if (in_interrupt()) - panic("Fatal exception in interrupt"); - if (panic_on_oops) - panic("Fatal exception"); - oops_exit(); - do_exit(signr); -} - -int __kprobes __die(const char *str, struct pt_regs *regs, long err) -{ - unsigned short ss; - unsigned long sp; - - printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); -#endif - printk("\n"); - sysfs_printk_last_file(); - if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) - return 1; - - show_registers(regs); - /* Executive summary in case the oops scrolled away */ - sp = (unsigned long) (®s->sp); - savesegment(ss, ss); - if (user_mode(regs)) { - sp = regs->sp; - ss = regs->ss & 0xffff; - } - printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); - print_symbol("%s", regs->ip); - printk(" SS:ESP %04x:%08lx\n", ss, sp); - return 0; -} - -/* - * This is gone through when something in the kernel has done something bad - * and is about to be terminated: - */ -void die(const char *str, struct pt_regs *regs, long err) -{ - unsigned long flags = oops_begin(); - - if (die_nest_count < 3) { - report_bug(regs->ip, regs); - - if (__die(str, regs, err)) - regs = NULL; - } else { - printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); - } - - oops_end(flags, regs, SIGSEGV); -} - -static DEFINE_SPINLOCK(nmi_print_lock); - -void notrace __kprobes -die_nmi(char *str, struct pt_regs *regs, int do_panic) -{ - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - spin_lock(&nmi_print_lock); - /* - * We are in trouble anyway, lets at least try - * to get a message out: - */ - bust_spinlocks(1); - printk(KERN_EMERG "%s", str); - printk(" on CPU%d, ip %08lx, registers:\n", - smp_processor_id(), regs->ip); - show_registers(regs); - if (do_panic) - panic("Non maskable interrupt"); - console_silent(); - spin_unlock(&nmi_print_lock); - - /* - * If we are in kernel we are probably nested up pretty bad - * and might aswell get out now while we still can: - */ - if (!user_mode_vm(regs)) { - current->thread.trap_no = 2; - crash_kexec(regs); - } - - bust_spinlocks(0); - do_exit(SIGSEGV); -} - -static int __init oops_setup(char *s) -{ - if (!s) - return -EINVAL; - if (!strcmp(s, "panic")) - panic_on_oops = 1; - return 0; -} -early_param("oops", oops_setup); - -static int __init kstack_setup(char *s) -{ - if (!s) - return -EINVAL; - kstack_depth_to_print = simple_strtoul(s, NULL, 0); - return 0; -} -early_param("kstack", kstack_setup); - -static int __init code_bytes_setup(char *s) -{ - code_bytes = simple_strtoul(s, NULL, 0); - if (code_bytes > 8192) - code_bytes = 8192; - - return 1; -} -__setup("code_bytes=", code_bytes_setup); diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 96a5db7da8a7..c302d0707048 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -17,19 +17,7 @@ #include <asm/stacktrace.h> -#define STACKSLOTS_PER_LINE 4 -#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) - -int panic_on_unrecovered_nmi; -int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; -static unsigned int code_bytes = 64; -static int die_counter; - -void printk_address(unsigned long address, int reliable) -{ - printk(" [<%p>] %s%pS\n", (void *) address, - reliable ? "" : "? ", (void *) address); -} +#include "dumpstack.h" static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, unsigned *usedp, char **idp) @@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack */ -static inline int valid_stack_ptr(struct thread_info *tinfo, - void *p, unsigned int size, void *end) -{ - void *t = tinfo; - if (end) { - if (p < end && p >= (end-THREAD_SIZE)) - return 1; - else - return 0; - } - return p > t && p < t + THREAD_SIZE - size; -} - -/* The form of the top of the frame on the stack */ -struct stack_frame { - struct stack_frame *next_frame; - unsigned long return_address; -}; - -static inline unsigned long -print_context_stack(struct thread_info *tinfo, - unsigned long *stack, unsigned long bp, - const struct stacktrace_ops *ops, void *data, - unsigned long *end) -{ - struct stack_frame *frame = (struct stack_frame *)bp; - - while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { - unsigned long addr; - - addr = *stack; - if (__kernel_text_address(addr)) { - if ((unsigned long) stack == bp + sizeof(long)) { - ops->address(data, addr, 1); - frame = frame->next_frame; - bp = (unsigned long) frame; - } else { - ops->address(data, addr, bp == 0); - } - } - stack++; - } - return bp; -} - void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data) @@ -166,6 +109,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; + int graph = 0; if (!task) task = current; @@ -206,7 +150,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, break; bp = print_context_stack(tinfo, stack, bp, ops, - data, estack_end); + data, estack_end, &graph); ops->stack(data, "<EOE>"); /* * We link to the next stack via the @@ -225,7 +169,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (ops->stack(data, "IRQ") < 0) break; bp = print_context_stack(tinfo, stack, bp, - ops, data, irqstack_end); + ops, data, irqstack_end, &graph); /* * We link to the next stack (which would be * the process stack normally) the last @@ -243,62 +187,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, /* * This handles the process stack: */ - bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); + bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); put_cpu(); } EXPORT_SYMBOL(dump_trace); -static void -print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - printk(data); - print_symbol(msg, symbol); - printk("\n"); -} - -static void print_trace_warning(void *data, char *msg) -{ - printk("%s%s\n", (char *)data, msg); -} - -static int print_trace_stack(void *data, char *name) -{ - printk("%s <%s> ", (char *)data, name); - return 0; -} - -/* - * Print one address/symbol entries per line. - */ -static void print_trace_address(void *data, unsigned long addr, int reliable) -{ - touch_nmi_watchdog(); - printk(data); - printk_address(addr, reliable); -} - -static const struct stacktrace_ops print_trace_ops = { - .warning = print_trace_warning, - .warning_symbol = print_trace_warning_symbol, - .stack = print_trace_stack, - .address = print_trace_address, -}; - -static void -show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp, char *log_lvl) -{ - printk("%sCall Trace:\n", log_lvl); - dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); -} - -void show_trace(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, unsigned long bp) -{ - show_trace_log_lvl(task, regs, stack, bp, ""); -} - -static void +void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *sp, unsigned long bp, char *log_lvl) { @@ -342,33 +236,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, show_trace_log_lvl(task, regs, sp, bp, log_lvl); } -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_log_lvl(task, NULL, sp, 0, ""); -} - -/* - * The architecture-independent dump_stack generator - */ -void dump_stack(void) -{ - unsigned long bp = 0; - unsigned long stack; - -#ifdef CONFIG_FRAME_POINTER - if (!bp) - get_bp(bp); -#endif - - printk("Pid: %d, comm: %.20s %s %s %.*s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - show_trace(NULL, NULL, &stack, bp); -} -EXPORT_SYMBOL(dump_stack); - void show_registers(struct pt_regs *regs) { int i; @@ -429,147 +296,3 @@ int is_valid_bugaddr(unsigned long ip) return ud2 == 0x0b0f; } -static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; -static int die_owner = -1; -static unsigned int die_nest_count; - -unsigned __kprobes long oops_begin(void) -{ - int cpu; - unsigned long flags; - - oops_enter(); - - /* racy, but better than risking deadlock. */ - raw_local_irq_save(flags); - cpu = smp_processor_id(); - if (!__raw_spin_trylock(&die_lock)) { - if (cpu == die_owner) - /* nested oops. should stop eventually */; - else - __raw_spin_lock(&die_lock); - } - die_nest_count++; - die_owner = cpu; - console_verbose(); - bust_spinlocks(1); - return flags; -} - -void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) -{ - die_owner = -1; - bust_spinlocks(0); - die_nest_count--; - if (!die_nest_count) - /* Nest count reaches zero, release the lock. */ - __raw_spin_unlock(&die_lock); - raw_local_irq_restore(flags); - if (!regs) { - oops_exit(); - return; - } - if (in_interrupt()) - panic("Fatal exception in interrupt"); - if (panic_on_oops) - panic("Fatal exception"); - oops_exit(); - do_exit(signr); -} - -int __kprobes __die(const char *str, struct pt_regs *regs, long err) -{ - printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP "); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); -#endif - printk("\n"); - sysfs_printk_last_file(); - if (notify_die(DIE_OOPS, str, regs, err, - current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) - return 1; - - show_registers(regs); - add_taint(TAINT_DIE); - /* Executive summary in case the oops scrolled away */ - printk(KERN_ALERT "RIP "); - printk_address(regs->ip, 1); - printk(" RSP <%016lx>\n", regs->sp); - if (kexec_should_crash(current)) - crash_kexec(regs); - return 0; -} - -void die(const char *str, struct pt_regs *regs, long err) -{ - unsigned long flags = oops_begin(); - - if (!user_mode(regs)) - report_bug(regs->ip, regs); - - if (__die(str, regs, err)) - regs = NULL; - oops_end(flags, regs, SIGSEGV); -} - -notrace __kprobes void -die_nmi(char *str, struct pt_regs *regs, int do_panic) -{ - unsigned long flags; - - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - flags = oops_begin(); - /* - * We are in trouble anyway, lets at least try - * to get a message out. - */ - printk(KERN_EMERG "%s", str); - printk(" on CPU%d, ip %08lx, registers:\n", - smp_processor_id(), regs->ip); - show_registers(regs); - if (kexec_should_crash(current)) - crash_kexec(regs); - if (do_panic || panic_on_oops) - panic("Non maskable interrupt"); - oops_end(flags, NULL, SIGBUS); - nmi_exit(); - local_irq_enable(); - do_exit(SIGBUS); -} - -static int __init oops_setup(char *s) -{ - if (!s) - return -EINVAL; - if (!strcmp(s, "panic")) - panic_on_oops = 1; - return 0; -} -early_param("oops", oops_setup); - -static int __init kstack_setup(char *s) -{ - if (!s) - return -EINVAL; - kstack_depth_to_print = simple_strtoul(s, NULL, 0); - return 0; -} -early_param("kstack", kstack_setup); - -static int __init code_bytes_setup(char *s) -{ - code_bytes = simple_strtoul(s, NULL, 0); - if (code_bytes > 8192) - code_bytes = 8192; - - return 1; -} -__setup("code_bytes=", code_bytes_setup); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 28b597ef9ca1..43ceb3f454bf 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1157,6 +1157,9 @@ ENTRY(mcount) END(mcount) ENTRY(ftrace_caller) + cmpl $0, function_trace_stop + jne ftrace_stub + pushl %eax pushl %ecx pushl %edx @@ -1171,6 +1174,11 @@ ftrace_call: popl %edx popl %ecx popl %eax +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + jmp ftrace_stub +#endif .globl ftrace_stub ftrace_stub: @@ -1180,8 +1188,18 @@ END(ftrace_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) + cmpl $0, function_trace_stop + jne ftrace_stub + cmpl $ftrace_stub, ftrace_trace_function jnz trace +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + cmpl $ftrace_stub, ftrace_graph_return + jnz ftrace_graph_caller + + cmpl $ftrace_graph_entry_stub, ftrace_graph_entry + jnz ftrace_graph_caller +#endif .globl ftrace_stub ftrace_stub: ret @@ -1200,12 +1218,43 @@ trace: popl %edx popl %ecx popl %eax - jmp ftrace_stub END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) + cmpl $0, function_trace_stop + jne ftrace_stub + + pushl %eax + pushl %ecx + pushl %edx + movl 0xc(%esp), %edx + lea 0x4(%ebp), %eax + subl $MCOUNT_INSN_SIZE, %edx + call prepare_ftrace_return + popl %edx + popl %ecx + popl %eax + ret +END(ftrace_graph_caller) + +.globl return_to_handler +return_to_handler: + pushl $0 + pushl %eax + pushl %ecx + pushl %edx + call ftrace_return_to_handler + movl %eax, 0xc(%esp) + popl %edx + popl %ecx + popl %eax + ret +#endif + .section .rodata,"a" #include "syscall_table_32.S" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b86f332c96a6..54e0bbdccb99 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -68,6 +68,8 @@ ENTRY(mcount) END(mcount) ENTRY(ftrace_caller) + cmpl $0, function_trace_stop + jne ftrace_stub /* taken from glibc */ subq $0x38, %rsp @@ -96,6 +98,12 @@ ftrace_call: movq (%rsp), %rax addq $0x38, %rsp +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + jmp ftrace_stub +#endif + .globl ftrace_stub ftrace_stub: retq @@ -103,8 +111,20 @@ END(ftrace_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ ENTRY(mcount) + cmpl $0, function_trace_stop + jne ftrace_stub + cmpq $ftrace_stub, ftrace_trace_function jnz trace + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + cmpq $ftrace_stub, ftrace_graph_return + jnz ftrace_graph_caller + + cmpq $ftrace_graph_entry_stub, ftrace_graph_entry + jnz ftrace_graph_caller +#endif + .globl ftrace_stub ftrace_stub: retq @@ -140,6 +160,69 @@ END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_TRACER */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) + cmpl $0, function_trace_stop + jne ftrace_stub + + subq $0x38, %rsp + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + + leaq 8(%rbp), %rdi + movq 0x38(%rsp), %rsi + subq $MCOUNT_INSN_SIZE, %rsi + + call prepare_ftrace_return + + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $0x38, %rsp + retq +END(ftrace_graph_caller) + + +.globl return_to_handler +return_to_handler: + subq $80, %rsp + + movq %rax, (%rsp) + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %rsi, 24(%rsp) + movq %rdi, 32(%rsp) + movq %r8, 40(%rsp) + movq %r9, 48(%rsp) + movq %r10, 56(%rsp) + movq %r11, 64(%rsp) + + call ftrace_return_to_handler + + movq %rax, 72(%rsp) + movq 64(%rsp), %r11 + movq 56(%rsp), %r10 + movq 48(%rsp), %r9 + movq 40(%rsp), %r8 + movq 32(%rsp), %rdi + movq 24(%rsp), %rsi + movq 16(%rsp), %rdx + movq 8(%rsp), %rcx + movq (%rsp), %rax + addq $72, %rsp + retq +#endif + + #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args #endif diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 0aa2c443d600..53699c931ad4 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -38,8 +38,11 @@ #include <asm/io.h> #include <asm/nmi.h> #include <asm/smp.h> +#include <asm/atomic.h> #include <asm/apicdef.h> #include <mach_mpparse.h> +#include <asm/genapic.h> +#include <asm/setup.h> /* * ES7000 chipsets @@ -161,6 +164,43 @@ es7000_rename_gsi(int ioapic, int gsi) return gsi; } +static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) +{ + unsigned long vect = 0, psaival = 0; + + if (psai == NULL) + return -1; + + vect = ((unsigned long)__pa(eip)/0x1000) << 16; + psaival = (0x1000000 | vect | cpu); + + while (*psai & 0x1000000) + ; + + *psai = psaival; + + return 0; +} + +static void noop_wait_for_deassert(atomic_t *deassert_not_used) +{ +} + +static int __init es7000_update_genapic(void) +{ + genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; + + /* MPENTIUMIII */ + if (boot_cpu_data.x86 == 6 && + (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { + es7000_update_genapic_to_cluster(); + genapic->wait_for_init_deassert = noop_wait_for_deassert; + genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; + } + + return 0; +} + void __init setup_unisys(void) { @@ -176,6 +216,8 @@ setup_unisys(void) else es7000_plat = ES7000_CLASSIC; ioapic_renumber_irq = es7000_rename_gsi; + + x86_quirks->update_genapic = es7000_update_genapic; } /* @@ -317,26 +359,6 @@ es7000_mip_write(struct mip_reg *mip_reg) return status; } -int -es7000_start_cpu(int cpu, unsigned long eip) -{ - unsigned long vect = 0, psaival = 0; - - if (psai == NULL) - return -1; - - vect = ((unsigned long)__pa(eip)/0x1000) << 16; - psaival = (0x1000000 | vect | cpu); - - while (*psai & 0x1000000) - ; - - *psai = psaival; - - return 0; - -} - void __init es7000_sw_apic(void) { diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 50ea0ac8c9bf..1b43086b097a 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -14,14 +14,17 @@ #include <linux/uaccess.h> #include <linux/ftrace.h> #include <linux/percpu.h> +#include <linux/sched.h> #include <linux/init.h> #include <linux/list.h> #include <asm/ftrace.h> +#include <linux/ftrace.h> #include <asm/nops.h> +#include <asm/nmi.h> -static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; +#ifdef CONFIG_DYNAMIC_FTRACE union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; @@ -31,18 +34,12 @@ union ftrace_code_union { } __attribute__((packed)); }; - static int ftrace_calc_offset(long ip, long addr) { return (int)(addr - ip); } -unsigned char *ftrace_nop_replace(void) -{ - return ftrace_nop; -} - -unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; @@ -56,7 +53,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) return calc.code; } -int +/* + * Modifying code must take extra care. On an SMP machine, if + * the code being modified is also being executed on another CPU + * that CPU will have undefined results and possibly take a GPF. + * We use kstop_machine to stop other CPUS from exectuing code. + * But this does not stop NMIs from happening. We still need + * to protect against that. We separate out the modification of + * the code to take care of this. + * + * Two buffers are added: An IP buffer and a "code" buffer. + * + * 1) Put the instruction pointer into the IP buffer + * and the new code into the "code" buffer. + * 2) Set a flag that says we are modifying code + * 3) Wait for any running NMIs to finish. + * 4) Write the code + * 5) clear the flag. + * 6) Wait for any running NMIs to finish. + * + * If an NMI is executed, the first thing it does is to call + * "ftrace_nmi_enter". This will check if the flag is set to write + * and if it is, it will write what is in the IP and "code" buffers. + * + * The trick is, it does not matter if everyone is writing the same + * content to the code location. Also, if a CPU is executing code + * it is OK to write to that code location if the contents being written + * are the same as what exists. + */ + +static atomic_t in_nmi = ATOMIC_INIT(0); +static int mod_code_status; /* holds return value of text write */ +static int mod_code_write; /* set when NMI should do the write */ +static void *mod_code_ip; /* holds the IP to write to */ +static void *mod_code_newcode; /* holds the text to write to the IP */ + +static unsigned nmi_wait_count; +static atomic_t nmi_update_count = ATOMIC_INIT(0); + +int ftrace_arch_read_dyn_info(char *buf, int size) +{ + int r; + + r = snprintf(buf, size, "%u %u", + nmi_wait_count, + atomic_read(&nmi_update_count)); + return r; +} + +static void ftrace_mod_code(void) +{ + /* + * Yes, more than one CPU process can be writing to mod_code_status. + * (and the code itself) + * But if one were to fail, then they all should, and if one were + * to succeed, then they all should. + */ + mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, + MCOUNT_INSN_SIZE); +} + +void ftrace_nmi_enter(void) +{ + atomic_inc(&in_nmi); + /* Must have in_nmi seen before reading write flag */ + smp_mb(); + if (mod_code_write) { + ftrace_mod_code(); + atomic_inc(&nmi_update_count); + } +} + +void ftrace_nmi_exit(void) +{ + /* Finish all executions before clearing in_nmi */ + smp_wmb(); + atomic_dec(&in_nmi); +} + +static void wait_for_nmi(void) +{ + int waited = 0; + + while (atomic_read(&in_nmi)) { + waited = 1; + cpu_relax(); + } + + if (waited) + nmi_wait_count++; +} + +static int +do_ftrace_mod_code(unsigned long ip, void *new_code) +{ + mod_code_ip = (void *)ip; + mod_code_newcode = new_code; + + /* The buffers need to be visible before we let NMIs write them */ + smp_wmb(); + + mod_code_write = 1; + + /* Make sure write bit is visible before we wait on NMIs */ + smp_mb(); + + wait_for_nmi(); + + /* Make sure all running NMIs have finished before we write the code */ + smp_mb(); + + ftrace_mod_code(); + + /* Make sure the write happens before clearing the bit */ + smp_wmb(); + + mod_code_write = 0; + + /* make sure NMIs see the cleared bit */ + smp_mb(); + + wait_for_nmi(); + + return mod_code_status; +} + + + + +static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; + +static unsigned char *ftrace_nop_replace(void) +{ + return ftrace_nop; +} + +static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { @@ -81,7 +213,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return -EINVAL; /* replace the text with the new text */ - if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) + if (do_ftrace_mod_code(ip, new_code)) return -EPERM; sync_core(); @@ -89,6 +221,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return 0; } +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *new, *old; + unsigned long ip = rec->ip; + + old = ftrace_call_replace(ip, addr); + new = ftrace_nop_replace(); + + return ftrace_modify_code(rec->ip, old, new); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *new, *old; + unsigned long ip = rec->ip; + + old = ftrace_nop_replace(); + new = ftrace_call_replace(ip, addr); + + return ftrace_modify_code(rec->ip, old, new); +} + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -165,3 +320,218 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } +#endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_graph_call(void); + +static int ftrace_mod_jmp(unsigned long ip, + int old_offset, int new_offset) +{ + unsigned char code[MCOUNT_INSN_SIZE]; + + if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) + return -EINVAL; + + *(int *)(&code[1]) = new_offset; + + if (do_ftrace_mod_code(ip, &code)) + return -EPERM; + + return 0; +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + unsigned long ip = (unsigned long)(&ftrace_graph_call); + int old_offset, new_offset; + + old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); + new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); + + return ftrace_mod_jmp(ip, old_offset, new_offset); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + unsigned long ip = (unsigned long)(&ftrace_graph_call); + int old_offset, new_offset; + + old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); + new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); + + return ftrace_mod_jmp(ip, old_offset, new_offset); +} + +#else /* CONFIG_DYNAMIC_FTRACE */ + +/* + * These functions are picked from those used on + * this page for dynamic ftrace. They have been + * simplified to ignore all traces in NMI context. + */ +static atomic_t in_nmi; + +void ftrace_nmi_enter(void) +{ + atomic_inc(&in_nmi); +} + +void ftrace_nmi_exit(void) +{ + atomic_dec(&in_nmi); +} + +#endif /* !CONFIG_DYNAMIC_FTRACE */ + +/* Add a function return address to the trace stack on thread info.*/ +static int push_return_trace(unsigned long ret, unsigned long long time, + unsigned long func, int *depth) +{ + int index; + + if (!current->ret_stack) + return -EBUSY; + + /* The return trace stack is full */ + if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { + atomic_inc(¤t->trace_overrun); + return -EBUSY; + } + + index = ++current->curr_ret_stack; + barrier(); + current->ret_stack[index].ret = ret; + current->ret_stack[index].func = func; + current->ret_stack[index].calltime = time; + *depth = index; + + return 0; +} + +/* Retrieve a function return address to the trace stack on thread info.*/ +static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) +{ + int index; + + index = current->curr_ret_stack; + + if (unlikely(index < 0)) { + ftrace_graph_stop(); + WARN_ON(1); + /* Might as well panic, otherwise we have no where to go */ + *ret = (unsigned long)panic; + return; + } + + *ret = current->ret_stack[index].ret; + trace->func = current->ret_stack[index].func; + trace->calltime = current->ret_stack[index].calltime; + trace->overrun = atomic_read(¤t->trace_overrun); + trace->depth = index; + barrier(); + current->curr_ret_stack--; + +} + +/* + * Send the trace to the ring-buffer. + * @return the original return address. + */ +unsigned long ftrace_return_to_handler(void) +{ + struct ftrace_graph_ret trace; + unsigned long ret; + + pop_return_trace(&trace, &ret); + trace.rettime = cpu_clock(raw_smp_processor_id()); + ftrace_graph_return(&trace); + + if (unlikely(!ret)) { + ftrace_graph_stop(); + WARN_ON(1); + /* Might as well panic. What else to do? */ + ret = (unsigned long)panic; + } + + return ret; +} + +/* + * Hook the return address and push it in the stack of return addrs + * in current thread info. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) +{ + unsigned long old; + unsigned long long calltime; + int faulted; + struct ftrace_graph_ent trace; + unsigned long return_hooker = (unsigned long) + &return_to_handler; + + /* Nmi's are currently unsupported */ + if (unlikely(atomic_read(&in_nmi))) + return; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + /* + * Protect against fault, even if it shouldn't + * happen. This tool is too much intrusive to + * ignore such a protection. + */ + asm volatile( + "1: " _ASM_MOV " (%[parent_old]), %[old]\n" + "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" + " movl $0, %[faulted]\n" + + ".section .fixup, \"ax\"\n" + "3: movl $1, %[faulted]\n" + ".previous\n" + + _ASM_EXTABLE(1b, 3b) + _ASM_EXTABLE(2b, 3b) + + : [parent_replaced] "=r" (parent), [old] "=r" (old), + [faulted] "=r" (faulted) + : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) + : "memory" + ); + + if (unlikely(faulted)) { + ftrace_graph_stop(); + WARN_ON(1); + return; + } + + if (unlikely(!__kernel_text_address(old))) { + ftrace_graph_stop(); + *parent = old; + WARN_ON(1); + return; + } + + calltime = cpu_clock(raw_smp_processor_id()); + + if (push_return_trace(old, calltime, + self_addr, &trace.depth) == -EBUSY) { + *parent = old; + return; + } + + trace.func = self_addr; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 6c9bfc9e1e95..2bced78b0b8e 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c @@ -21,6 +21,7 @@ #include <asm/smp.h> #include <asm/ipi.h> #include <asm/genapic.h> +#include <asm/setup.h> extern struct genapic apic_flat; extern struct genapic apic_physflat; @@ -53,6 +54,9 @@ void __init setup_apic_routing(void) genapic = &apic_physflat; printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); } + + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); } /* Same for both flat and physical. */ diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 0f4c1fd5a1f4..45e3b69808ba 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -586,26 +586,23 @@ static void __init __get_smp_config(unsigned int early) { struct intel_mp_floating *mpf = mpf_found; - if (x86_quirks->mach_get_smp_config) { - if (x86_quirks->mach_get_smp_config(early)) - return; - } + if (!mpf) + return; + if (acpi_lapic && early) return; + /* - * ACPI supports both logical (e.g. Hyper-Threading) and physical - * processors, where MPS only supports physical. + * MPS doesn't support hyperthreading, aka only have + * thread 0 apic id in MPS table */ - if (acpi_lapic && acpi_ioapic) { - printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " - "information\n"); + if (acpi_lapic && acpi_ioapic) return; - } else if (acpi_lapic) - printk(KERN_INFO "Using ACPI for processor (LAPIC) " - "configuration information\n"); - if (!mpf) - return; + if (x86_quirks->mach_get_smp_config) { + if (x86_quirks->mach_get_smp_config(early)) + return; + } printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 4caff39078e0..0deea37a53cf 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -31,7 +31,7 @@ #include <asm/numaq.h> #include <asm/topology.h> #include <asm/processor.h> -#include <asm/mpspec.h> +#include <asm/genapic.h> #include <asm/e820.h> #include <asm/setup.h> @@ -235,6 +235,13 @@ static int __init numaq_setup_ioapic_ids(void) return 1; } +static int __init numaq_update_genapic(void) +{ + genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; + + return 0; +} + static struct x86_quirks numaq_x86_quirks __initdata = { .arch_pre_time_init = numaq_pre_time_init, .arch_time_init = NULL, @@ -250,6 +257,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = { .mpc_oem_pci_bus = mpc_oem_pci_bus, .smp_read_mpc_oem = smp_read_mpc_oem, .setup_ioapic_ids = numaq_setup_ioapic_ids, + .update_genapic = numaq_update_genapic, }; void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 0e9f1982b1dd..95777b0faa73 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -7,7 +7,8 @@ #include <asm/paravirt.h> -static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) +static inline void +default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) { __raw_spin_lock(lock); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c622772744d8..95d811a9594f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -7,7 +7,9 @@ #include <linux/module.h> #include <linux/pm.h> #include <linux/clockchips.h> +#include <linux/ftrace.h> #include <asm/system.h> +#include <asm/apic.h> unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -100,6 +102,9 @@ static inline int hlt_use_halt(void) void default_idle(void) { if (hlt_use_halt()) { + struct power_trace it; + + trace_power_start(&it, POWER_CSTATE, 1); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -112,6 +117,7 @@ void default_idle(void) else local_irq_enable(); current_thread_info()->status |= TS_POLLING; + trace_power_end(&it); } else { local_irq_enable(); /* loop is done by the caller */ @@ -122,6 +128,21 @@ void default_idle(void) EXPORT_SYMBOL(default_idle); #endif +void stop_this_cpu(void *dummy) +{ + local_irq_disable(); + /* + * Remove this CPU: + */ + cpu_clear(smp_processor_id(), cpu_online_map); + disable_local_APIC(); + + for (;;) { + if (hlt_works(smp_processor_id())) + halt(); + } +} + static void do_nothing(void *unused) { } @@ -154,24 +175,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { + struct power_trace it; + + trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); if (!need_resched()) { __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) __mwait(ax, cx); } + trace_power_end(&it); } /* Default MONITOR/MWAIT with no hints, used for default C1 state */ static void mwait_idle(void) { + struct power_trace it; if (!need_resched()) { + trace_power_start(&it, POWER_CSTATE, 1); __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) __sti_mwait(0, 0); else local_irq_enable(); + trace_power_end(&it); } else local_irq_enable(); } @@ -183,9 +211,13 @@ static void mwait_idle(void) */ static void poll_idle(void) { + struct power_trace it; + + trace_power_start(&it, POWER_CSTATE, 0); local_irq_enable(); while (!need_resched()) cpu_relax(); + trace_power_end(&it); } /* diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0a1302fe6d45..24c2276aa453 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -38,6 +38,7 @@ #include <linux/percpu.h> #include <linux/prctl.h> #include <linux/dmi.h> +#include <linux/ftrace.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -548,7 +549,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, * the task-switch, and shows up in ret_from_fork in entry.S, * for example. */ -struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) +__notrace_funcgraph struct task_struct * +__switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c958120fb1b6..fbb321d53d34 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -39,6 +39,7 @@ #include <linux/prctl.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/ftrace.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -551,8 +552,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, * - could test fs/gs bitsliced * * Kprobes not supported here. Set the probe on schedule instead. + * Function graph tracer not supported too. */ -struct task_struct * +__notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a6d8c12e10d..2c8ec1ba75e6 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -668,14 +668,14 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index, size_t bts_index, bts_end; int error; - error = ds_get_bts_end(child, &bts_end); + error = ds_get_bts_end(child->bts, &bts_end); if (error < 0) return error; if (bts_end <= index) return -EINVAL; - error = ds_get_bts_index(child, &bts_index); + error = ds_get_bts_index(child->bts, &bts_index); if (error < 0) return error; @@ -684,7 +684,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index, if (bts_end <= bts_index) bts_index -= bts_end; - error = ds_access_bts(child, bts_index, &bts_record); + error = ds_access_bts(child->bts, bts_index, &bts_record); if (error < 0) return error; @@ -705,14 +705,14 @@ static int ptrace_bts_drain(struct task_struct *child, size_t end, i; int error; - error = ds_get_bts_index(child, &end); + error = ds_get_bts_index(child->bts, &end); if (error < 0) return error; if (size < (end * sizeof(struct bts_struct))) return -EIO; - error = ds_access_bts(child, 0, (const void **)&raw); + error = ds_access_bts(child->bts, 0, (const void **)&raw); if (error < 0) return error; @@ -723,18 +723,13 @@ static int ptrace_bts_drain(struct task_struct *child, return -EFAULT; } - error = ds_clear_bts(child); + error = ds_clear_bts(child->bts); if (error < 0) return error; return end; } -static void ptrace_bts_ovfl(struct task_struct *child) -{ - send_sig(child->thread.bts_ovfl_signal, child, 0); -} - static int ptrace_bts_config(struct task_struct *child, long cfg_size, const struct ptrace_bts_config __user *ucfg) @@ -760,23 +755,45 @@ static int ptrace_bts_config(struct task_struct *child, goto errout; if (cfg.flags & PTRACE_BTS_O_ALLOC) { - ds_ovfl_callback_t ovfl = NULL; + bts_ovfl_callback_t ovfl = NULL; unsigned int sig = 0; - /* we ignore the error in case we were not tracing child */ - (void)ds_release_bts(child); + error = -EINVAL; + if (cfg.size < (10 * bts_cfg.sizeof_bts)) + goto errout; if (cfg.flags & PTRACE_BTS_O_SIGNAL) { if (!cfg.signal) goto errout; + error = -EOPNOTSUPP; + goto errout; + sig = cfg.signal; - ovfl = ptrace_bts_ovfl; } - error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); - if (error < 0) + if (child->bts) { + (void)ds_release_bts(child->bts); + kfree(child->bts_buffer); + + child->bts = NULL; + child->bts_buffer = NULL; + } + + error = -ENOMEM; + child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL); + if (!child->bts_buffer) + goto errout; + + child->bts = ds_request_bts(child, child->bts_buffer, cfg.size, + ovfl, /* th = */ (size_t)-1); + if (IS_ERR(child->bts)) { + error = PTR_ERR(child->bts); + kfree(child->bts_buffer); + child->bts = NULL; + child->bts_buffer = NULL; goto errout; + } child->thread.bts_ovfl_signal = sig; } @@ -823,15 +840,15 @@ static int ptrace_bts_status(struct task_struct *child, if (cfg_size < sizeof(cfg)) return -EIO; - error = ds_get_bts_end(child, &end); + error = ds_get_bts_end(child->bts, &end); if (error < 0) return error; - error = ds_access_bts(child, /* index = */ 0, &base); + error = ds_access_bts(child->bts, /* index = */ 0, &base); if (error < 0) return error; - error = ds_access_bts(child, /* index = */ end, &max); + error = ds_access_bts(child->bts, /* index = */ end, &max); if (error < 0) return error; @@ -884,10 +901,7 @@ static int ptrace_bts_write_record(struct task_struct *child, return -EINVAL; } - /* The writing task will be the switched-to task on a context - * switch. It needs to write into the switched-from task's BTS - * buffer. */ - return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); + return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts); } void ptrace_bts_take_timestamp(struct task_struct *tsk, @@ -929,17 +943,16 @@ void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { + case 0 ... 0xC: + /* sorry, don't know about them */ + break; case 0xD: case 0xE: /* Pentium M */ bts_configure(&bts_cfg_pentium_m); break; - case 0xF: /* Core2 */ - case 0x1C: /* Atom */ + default: /* Core2, Atom, ... */ bts_configure(&bts_cfg_core2); break; - default: - /* sorry, don't know about them */ - break; } break; case 0xF: @@ -973,13 +986,17 @@ void ptrace_disable(struct task_struct *child) clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif #ifdef CONFIG_X86_PTRACE_BTS - (void)ds_release_bts(child); + if (child->bts) { + (void)ds_release_bts(child->bts); + kfree(child->bts_buffer); + child->bts_buffer = NULL; - child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; - if (!child->thread.debugctlmsr) - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; + if (!child->thread.debugctlmsr) + clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); + clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); + } #endif /* CONFIG_X86_PTRACE_BTS */ } @@ -1111,9 +1128,16 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) (child, data, (struct ptrace_bts_config __user *)addr); break; - case PTRACE_BTS_SIZE: - ret = ds_get_bts_index(child, /* pos = */ NULL); + case PTRACE_BTS_SIZE: { + size_t size; + + ret = ds_get_bts_index(child->bts, &size); + if (ret == 0) { + BUG_ON(size != (int) size); + ret = (int) size; + } break; + } case PTRACE_BTS_GET: ret = ptrace_bts_read_record @@ -1121,7 +1145,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_CLEAR: - ret = ds_clear_bts(child); + ret = ds_clear_bts(child->bts); break; case PTRACE_BTS_DRAIN: diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index cc5a2545dd41..0e3dbc7b2bdb 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -36,7 +36,10 @@ int reboot_force; static int reboot_cpu = -1; #endif -/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] +/* This is set by the PCI code if either type 1 or type 2 PCI is detected */ +bool port_cf9_safe = false; + +/* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] warm Don't set the cold reboot flag cold Set the cold reboot flag bios Reboot by jumping through the BIOS (only for X86_32) @@ -45,6 +48,7 @@ static int reboot_cpu = -1; kbd Use the keyboard controller. cold reset (default) acpi Use the RESET_REG in the FADT efi Use efi reset_system runtime service + pci Use the so-called "PCI reset register", CF9 force Avoid anything that could hang. */ static int __init reboot_setup(char *str) @@ -79,6 +83,7 @@ static int __init reboot_setup(char *str) case 'k': case 't': case 'e': + case 'p': reboot_type = *str; break; @@ -404,12 +409,27 @@ static void native_machine_emergency_restart(void) reboot_type = BOOT_KBD; break; - case BOOT_EFI: if (efi_enabled) - efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD, + efi.reset_system(reboot_mode ? + EFI_RESET_WARM : + EFI_RESET_COLD, EFI_SUCCESS, 0, NULL); + reboot_type = BOOT_KBD; + break; + + case BOOT_CF9: + port_cf9_safe = true; + /* fall through */ + case BOOT_CF9_COND: + if (port_cf9_safe) { + u8 cf9 = inb(0xcf9) & ~6; + outb(cf9|2, 0xcf9); /* Request hard reset */ + udelay(50); + outb(cf9|6, 0xcf9); /* Actually do the reset */ + udelay(50); + } reboot_type = BOOT_KBD; break; } @@ -470,6 +490,11 @@ static void native_machine_restart(char *__unused) static void native_machine_halt(void) { + /* stop other cpus and apics */ + machine_shutdown(); + + /* stop this cpu */ + stop_this_cpu(NULL); } static void native_machine_power_off(void) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a3834f123206..b9018955a04f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -583,7 +583,20 @@ static int __init setup_elfcorehdr(char *arg) early_param("elfcorehdr", setup_elfcorehdr); #endif -static struct x86_quirks default_x86_quirks __initdata; +static int __init default_update_genapic(void) +{ +#ifdef CONFIG_X86_SMP +# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) + genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; +# endif +#endif + + return 0; +} + +static struct x86_quirks default_x86_quirks __initdata = { + .update_genapic = default_update_genapic, +}; struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 18f9b19f5f8f..3f92b134ab90 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -140,19 +140,6 @@ void native_send_call_func_ipi(cpumask_t mask) send_IPI_mask(mask, CALL_FUNCTION_VECTOR); } -static void stop_this_cpu(void *dummy) -{ - local_irq_disable(); - /* - * Remove this CPU: - */ - cpu_clear(smp_processor_id(), cpu_online_map); - disable_local_APIC(); - if (hlt_works(smp_processor_id())) - for (;;) halt(); - for (;;); -} - /* * this function calls the 'stop' function on all other CPUs in the system. */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7b1093397319..0e9f446269f4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -62,6 +62,7 @@ #include <asm/mtrr.h> #include <asm/vmi.h> #include <asm/genapic.h> +#include <asm/setup.h> #include <linux/mc146818rtc.h> #include <mach_apic.h> @@ -536,7 +537,7 @@ static void impress_friends(void) pr_debug("Before bogocount - setting activated=1.\n"); } -static inline void __inquire_remote_apic(int apicid) +void __inquire_remote_apic(int apicid) { unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; char *names[] = { "ID", "VERSION", "SPIV" }; @@ -575,14 +576,13 @@ static inline void __inquire_remote_apic(int apicid) } } -#ifdef WAKE_SECONDARY_VIA_NMI /* * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this * won't ... remember to clear down the APIC, etc later. */ -static int __devinit -wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) +int __devinit +wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) { unsigned long send_status, accept_status = 0; int maxlvt; @@ -599,7 +599,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) * Give the other CPU some time to accept the IPI. */ udelay(200); - if (APIC_INTEGRATED(apic_version[phys_apicid])) { + if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { maxlvt = lapic_get_maxlvt(); if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); @@ -614,11 +614,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) return (send_status | accept_status); } -#endif /* WAKE_SECONDARY_VIA_NMI */ -#ifdef WAKE_SECONDARY_VIA_INIT -static int __devinit -wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) +int __devinit +wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) { unsigned long send_status, accept_status = 0; int maxlvt, num_starts, j; @@ -737,7 +735,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) return (send_status | accept_status); } -#endif /* WAKE_SECONDARY_VIA_INIT */ struct create_idle { struct work_struct work; diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index a03e7f6d90c3..10786af95545 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -6,6 +6,7 @@ #include <linux/sched.h> #include <linux/stacktrace.h> #include <linux/module.h> +#include <linux/uaccess.h> #include <asm/stacktrace.h> static void save_stack_warning(void *data, char *msg) @@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ + +struct stack_frame { + const void __user *next_fp; + unsigned long ret_addr; +}; + +static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) +{ + int ret; + + if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) + return 0; + + ret = 1; + pagefault_disable(); + if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) + ret = 0; + pagefault_enable(); + + return ret; +} + +static inline void __save_stack_trace_user(struct stack_trace *trace) +{ + const struct pt_regs *regs = task_pt_regs(current); + const void __user *fp = (const void __user *)regs->bp; + + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = regs->ip; + + while (trace->nr_entries < trace->max_entries) { + struct stack_frame frame; + + frame.next_fp = NULL; + frame.ret_addr = 0; + if (!copy_stack_frame(fp, &frame)) + break; + if ((unsigned long)fp < regs->sp) + break; + if (frame.ret_addr) { + trace->entries[trace->nr_entries++] = + frame.ret_addr; + } + if (fp == frame.next_fp) + break; + fp = frame.next_fp; + } +} + +void save_stack_trace_user(struct stack_trace *trace) +{ + /* + * Trace user stack if we are not a kernel thread + */ + if (current->mm) { + __save_stack_trace_user(trace); + } + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 0b8b6690a86d..6f3d3d4cd973 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -17,6 +17,9 @@ * want per guest time just set the kernel.vsyscall64 sysctl to 0. */ +/* Disable profiling for userspace code: */ +#define DISABLE_BRANCH_PROFILING + #include <linux/time.h> #include <linux/init.h> #include <linux/kernel.h> diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 9e68075544f6..4a20b2f9a381 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon #define __do_strncpy_from_user(dst, src, count, res) \ do { \ int __d0, __d1, __d2; \ - might_sleep(); \ + might_fault(); \ __asm__ __volatile__( \ " testl %1,%1\n" \ " jz 2f\n" \ @@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user); #define __do_clear_user(addr,size) \ do { \ int __d0; \ - might_sleep(); \ + might_fault(); \ __asm__ __volatile__( \ "0: rep; stosl\n" \ " movl %2,%0\n" \ @@ -155,7 +155,7 @@ do { \ unsigned long clear_user(void __user *to, unsigned long n) { - might_sleep(); + might_fault(); if (access_ok(VERIFY_WRITE, to, n)) __do_clear_user(to, n); return n; @@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n) unsigned long mask = -__addr_ok(s); unsigned long res, tmp; - might_sleep(); + might_fault(); __asm__ __volatile__( " testl %0, %0\n" diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index f4df6e7c718b..64d6c84e6353 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -15,7 +15,7 @@ #define __do_strncpy_from_user(dst,src,count,res) \ do { \ long __d0, __d1, __d2; \ - might_sleep(); \ + might_fault(); \ __asm__ __volatile__( \ " testq %1,%1\n" \ " jz 2f\n" \ @@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user); unsigned long __clear_user(void __user *addr, unsigned long size) { long __d0; - might_sleep(); + might_fault(); /* no memory constraint because it doesn't change any memory gcc knows about */ asm volatile( diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 3c3b471ea496..3624a364b7f3 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -17,6 +17,7 @@ #include <asm/bigsmp/apic.h> #include <asm/bigsmp/ipi.h> #include <asm/mach-default/mach_mpparse.h> +#include <asm/mach-default/mach_wakecpu.h> static int dmi_bigsmp; /* can be set by dmi scanners */ diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 9e835a11a13a..e63a4a76d8cd 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c @@ -16,6 +16,7 @@ #include <asm/mach-default/mach_apic.h> #include <asm/mach-default/mach_ipi.h> #include <asm/mach-default/mach_mpparse.h> +#include <asm/mach-default/mach_wakecpu.h> /* should be called last. */ static int probe_default(void) diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 28459cab3ddb..7b4e6d0d1690 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -16,7 +16,19 @@ #include <asm/es7000/apic.h> #include <asm/es7000/ipi.h> #include <asm/es7000/mpparse.h> -#include <asm/es7000/wakecpu.h> +#include <asm/mach-default/mach_wakecpu.h> + +void __init es7000_update_genapic_to_cluster(void) +{ + genapic->target_cpus = target_cpus_cluster; + genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER; + genapic->int_dest_mode = INT_DEST_MODE_CLUSTER; + genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER; + + genapic->init_apic_ldr = init_apic_ldr_cluster; + + genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster; +} static int probe_es7000(void) { diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index 5a7e4619e1c4..c346d9d0226f 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c @@ -15,6 +15,7 @@ #include <asm/mpspec.h> #include <asm/apicdef.h> #include <asm/genapic.h> +#include <asm/setup.h> extern struct genapic apic_numaq; extern struct genapic apic_summit; @@ -57,6 +58,9 @@ static int __init parse_apic(char *arg) } } + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); + /* Parsed again by __setup for debug/verbose */ return 0; } @@ -72,12 +76,15 @@ void __init generic_bigsmp_probe(void) * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support */ - if (!cmdline_apic && genapic == &apic_default) + if (!cmdline_apic && genapic == &apic_default) { if (apic_bigsmp.probe()) { genapic = &apic_bigsmp; + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); printk(KERN_INFO "Overriding APIC driver with %s\n", genapic->name); } + } #endif } @@ -94,6 +101,9 @@ void __init generic_apic_probe(void) /* Not visible without early console */ if (!apic_probe[i]) panic("Didn't find an APIC driver"); + + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); } printk(KERN_INFO "Using APIC driver %s\n", genapic->name); } @@ -108,6 +118,8 @@ int __init mps_oem_check(struct mp_config_table *mpc, char *oem, if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { if (!cmdline_apic) { genapic = apic_probe[i]; + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); printk(KERN_INFO "Switched to APIC driver `%s'.\n", genapic->name); } @@ -124,6 +136,8 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { if (!cmdline_apic) { genapic = apic_probe[i]; + if (x86_quirks->update_genapic) + x86_quirks->update_genapic(); printk(KERN_INFO "Switched to APIC driver `%s'.\n", genapic->name); } diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 6272b5e69da6..2c6d234e0009 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -16,6 +16,7 @@ #include <asm/summit/apic.h> #include <asm/summit/ipi.h> #include <asm/summit/mpparse.h> +#include <asm/mach-default/mach_wakecpu.h> static int probe_summit(void) { diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index fea4565ff576..d8cc96a2738f 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o obj-$(CONFIG_HIGHMEM) += highmem_32.o -obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o obj-$(CONFIG_MMIOTRACE) += mmiotrace.o -mmiotrace-y := pf_in.o mmio-mod.o +mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o obj-$(CONFIG_NUMA) += numa_$(BITS).o diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 31e8730fa246..21e996a70d68 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -53,7 +53,7 @@ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) { -#ifdef CONFIG_MMIOTRACE_HOOKS +#ifdef CONFIG_MMIOTRACE if (unlikely(is_kmmio_active())) if (kmmio_handler(regs, addr) == 1) return -1; @@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, unsigned long error_code) { unsigned long flags = oops_begin(); + int sig = SIGKILL; struct task_struct *tsk; printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", @@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, tsk->thread.trap_no = 14; tsk->thread.error_code = error_code; if (__die("Bad pagetable", regs, error_code)) - regs = NULL; - oops_end(flags, regs, SIGKILL); + sig = 0; + oops_end(flags, regs, sig); } #endif @@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) int fault; #ifdef CONFIG_X86_64 unsigned long flags; + int sig; #endif tsk = current; @@ -849,11 +851,12 @@ no_context: bust_spinlocks(0); do_exit(SIGKILL); #else + sig = SIGKILL; if (__die("Oops", regs, error_code)) - regs = NULL; + sig = 0; /* Executive summary in case the body of the oops scrolled away */ printk(KERN_EMERG "CR2: %016lx\n", address); - oops_end(flags, regs, SIGKILL); + oops_end(flags, regs, sig); #endif /* diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 9915293500fb..9a5af6c8fbe9 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c @@ -173,7 +173,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, #undef PCI_CONF2_ADDRESS -static struct pci_raw_ops pci_direct_conf2 = { +struct pci_raw_ops pci_direct_conf2 = { .read = pci_conf2_read, .write = pci_conf2_write, }; @@ -289,6 +289,7 @@ int __init pci_direct_probe(void) if (pci_check_type1()) { raw_pci_ops = &pci_direct_conf1; + port_cf9_safe = true; return 1; } release_resource(region); @@ -305,6 +306,7 @@ int __init pci_direct_probe(void) if (pci_check_type2()) { raw_pci_ops = &pci_direct_conf2; + port_cf9_safe = true; return 2; } diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index 15b9cf6be729..1959018aac02 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h @@ -96,6 +96,7 @@ extern struct pci_raw_ops *raw_pci_ops; extern struct pci_raw_ops *raw_pci_ext_ops; extern struct pci_raw_ops pci_direct_conf1; +extern bool port_cf9_safe; /* arch_initcall level */ extern int pci_direct_probe(void); diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 1ef0f90813d6..d9d35824c56f 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -9,6 +9,9 @@ * Also alternative() doesn't work. */ +/* Disable profiling for userspace code: */ +#define DISABLE_BRANCH_PROFILING + #include <linux/kernel.h> #include <linux/posix-timers.h> #include <linux/time.h> |