From 771dae81896855d25f7f8746aaf56c0238deafb6 Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Wed, 30 Nov 2011 02:46:31 +0000 Subject: powerpc/cpuidle: Add cpu_idle_wait() to allow switching of idle routines This patch provides cpu_idle_wait() routine for the powerpc platform which is required by the cpuidle subsystem. This routine is required to change the idle handler on SMP systems. The equivalent routine for x86 is in arch/x86/kernel/process.c but the powerpc implementation is different. cpuidle_disable variable is to enable/disable cpuidle framework if power_save option is set during the boot time. Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Signed-off-by: Arun R Bharadwaj Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/system.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include/asm/system.h') diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index e30a13d1ee76..ff6668038799 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -221,6 +221,7 @@ extern unsigned long klimit; extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); extern int powersave_nap; /* set if nap mode can be used in idle loop */ +void cpu_idle_wait(void); /* * Atomic exchange -- cgit From 707827f3387d9b260d50fa697885a4042cea3bf4 Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Wed, 30 Nov 2011 02:46:42 +0000 Subject: powerpc/cpuidle: cpuidle driver for pSeries This patch implements a back-end cpuidle driver for pSeries based on pseries_dedicated_idle_loop and pseries_shared_idle_loop routines. The driver is built only if CONFIG_CPU_IDLE is set. This cpuidle driver uses global registration of idle states and not per-cpu. Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Signed-off-by: Arun R Bharadwaj Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/system.h | 8 + arch/powerpc/kernel/sysfs.c | 2 + arch/powerpc/platforms/pseries/Kconfig | 9 + arch/powerpc/platforms/pseries/Makefile | 1 + arch/powerpc/platforms/pseries/processor_idle.c | 326 ++++++++++++++++++++++++ arch/powerpc/platforms/pseries/pseries.h | 3 + arch/powerpc/platforms/pseries/setup.c | 3 - arch/powerpc/platforms/pseries/smp.c | 1 + 8 files changed, 350 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/platforms/pseries/processor_idle.c (limited to 'arch/powerpc/include/asm/system.h') diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index ff6668038799..f56a0a75d989 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -223,6 +223,14 @@ extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); extern int powersave_nap; /* set if nap mode can be used in idle loop */ void cpu_idle_wait(void); +#ifdef CONFIG_PSERIES_IDLE +extern void update_smt_snooze_delay(int snooze); +extern int pseries_notify_cpuidle_add_cpu(int cpu); +#else +static inline void update_smt_snooze_delay(int snooze) {} +static inline int pseries_notify_cpuidle_add_cpu(int cpu) { return 0; } +#endif + /* * Atomic exchange * diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index f579be552094..6fdf5ffe8c44 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "cacheinfo.h" @@ -51,6 +52,7 @@ static ssize_t store_smt_snooze_delay(struct sys_device *dev, return -EINVAL; per_cpu(smt_snooze_delay, cpu->sysdev.id) = snooze; + update_smt_snooze_delay(snooze); return count; } diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index c81f6bb9c10f..ae7b6d41fed3 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -120,3 +120,12 @@ config DTL which are accessible through a debugfs file. Say N if you are unsure. + +config PSERIES_IDLE + tristate "Cpuidle driver for pSeries platforms" + depends on CPU_IDLE + depends on PPC_PSERIES + default y + help + Select this option to enable processor idle state management + through cpuidle subsystem. diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 3556e402cbf5..236db46b4078 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_PHYP_DUMP) += phyp_dump.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_DTL) += dtl.o obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o +obj-$(CONFIG_PSERIES_IDLE) += processor_idle.o ifeq ($(CONFIG_PPC_PSERIES),y) obj-$(CONFIG_SUSPEND) += suspend.o diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c new file mode 100644 index 000000000000..f7e3e877cb69 --- /dev/null +++ b/arch/powerpc/platforms/pseries/processor_idle.c @@ -0,0 +1,326 @@ +/* + * processor_idle - idle state cpuidle driver. + * Adapted from drivers/idle/intel_idle.c and + * drivers/acpi/processor_idle.c + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "plpar_wrappers.h" +#include "pseries.h" + +struct cpuidle_driver pseries_idle_driver = { + .name = "pseries_idle", + .owner = THIS_MODULE, +}; + +#define MAX_IDLE_STATE_COUNT 2 + +static int max_idle_state = MAX_IDLE_STATE_COUNT - 1; +static struct cpuidle_device __percpu *pseries_cpuidle_devices; +static struct cpuidle_state *cpuidle_state_table; + +void update_smt_snooze_delay(int snooze) +{ + struct cpuidle_driver *drv = cpuidle_get_driver(); + if (drv) + drv->states[0].target_residency = snooze; +} + +static inline void idle_loop_prolog(unsigned long *in_purr, ktime_t *kt_before) +{ + + *kt_before = ktime_get_real(); + *in_purr = mfspr(SPRN_PURR); + /* + * Indicate to the HV that we are idle. Now would be + * a good time to find other work to dispatch. + */ + get_lppaca()->idle = 1; +} + +static inline s64 idle_loop_epilog(unsigned long in_purr, ktime_t kt_before) +{ + get_lppaca()->wait_state_cycles += mfspr(SPRN_PURR) - in_purr; + get_lppaca()->idle = 0; + + return ktime_to_us(ktime_sub(ktime_get_real(), kt_before)); +} + +static int snooze_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + unsigned long in_purr; + ktime_t kt_before; + unsigned long start_snooze; + long snooze = drv->states[0].target_residency; + + idle_loop_prolog(&in_purr, &kt_before); + + if (snooze) { + start_snooze = get_tb() + snooze * tb_ticks_per_usec; + local_irq_enable(); + set_thread_flag(TIF_POLLING_NRFLAG); + + while ((snooze < 0) || (get_tb() < start_snooze)) { + if (need_resched() || cpu_is_offline(dev->cpu)) + goto out; + ppc64_runlatch_off(); + HMT_low(); + HMT_very_low(); + } + + HMT_medium(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb(); + local_irq_disable(); + } + +out: + HMT_medium(); + dev->last_residency = + (int)idle_loop_epilog(in_purr, kt_before); + return index; +} + +static int dedicated_cede_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + unsigned long in_purr; + ktime_t kt_before; + + idle_loop_prolog(&in_purr, &kt_before); + get_lppaca()->donate_dedicated_cpu = 1; + + ppc64_runlatch_off(); + HMT_medium(); + cede_processor(); + + get_lppaca()->donate_dedicated_cpu = 0; + dev->last_residency = + (int)idle_loop_epilog(in_purr, kt_before); + return index; +} + +static int shared_cede_loop(struct cpuidle_device *dev, + struct cpuidle_driver *drv, + int index) +{ + unsigned long in_purr; + ktime_t kt_before; + + idle_loop_prolog(&in_purr, &kt_before); + + /* + * Yield the processor to the hypervisor. We return if + * an external interrupt occurs (which are driven prior + * to returning here) or if a prod occurs from another + * processor. When returning here, external interrupts + * are enabled. + */ + cede_processor(); + + dev->last_residency = + (int)idle_loop_epilog(in_purr, kt_before); + return index; +} + +/* + * States for dedicated partition case. + */ +static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = { + { /* Snooze */ + .name = "snooze", + .desc = "snooze", + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 0, + .target_residency = 0, + .enter = &snooze_loop }, + { /* CEDE */ + .name = "CEDE", + .desc = "CEDE", + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 1, + .target_residency = 10, + .enter = &dedicated_cede_loop }, +}; + +/* + * States for shared partition case. + */ +static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = { + { /* Shared Cede */ + .name = "Shared Cede", + .desc = "Shared Cede", + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 0, + .target_residency = 0, + .enter = &shared_cede_loop }, +}; + +int pseries_notify_cpuidle_add_cpu(int cpu) +{ + struct cpuidle_device *dev = + per_cpu_ptr(pseries_cpuidle_devices, cpu); + if (dev && cpuidle_get_driver()) { + cpuidle_disable_device(dev); + cpuidle_enable_device(dev); + } + return 0; +} + +/* + * pseries_cpuidle_driver_init() + */ +static int pseries_cpuidle_driver_init(void) +{ + int idle_state; + struct cpuidle_driver *drv = &pseries_idle_driver; + + drv->state_count = 0; + + for (idle_state = 0; idle_state < MAX_IDLE_STATE_COUNT; ++idle_state) { + + if (idle_state > max_idle_state) + break; + + /* is the state not enabled? */ + if (cpuidle_state_table[idle_state].enter == NULL) + continue; + + drv->states[drv->state_count] = /* structure copy */ + cpuidle_state_table[idle_state]; + + if (cpuidle_state_table == dedicated_states) + drv->states[drv->state_count].target_residency = + __get_cpu_var(smt_snooze_delay); + + drv->state_count += 1; + } + + return 0; +} + +/* pseries_idle_devices_uninit(void) + * unregister cpuidle devices and de-allocate memory + */ +static void pseries_idle_devices_uninit(void) +{ + int i; + struct cpuidle_device *dev; + + for_each_possible_cpu(i) { + dev = per_cpu_ptr(pseries_cpuidle_devices, i); + cpuidle_unregister_device(dev); + } + + free_percpu(pseries_cpuidle_devices); + return; +} + +/* pseries_idle_devices_init() + * allocate, initialize and register cpuidle device + */ +static int pseries_idle_devices_init(void) +{ + int i; + struct cpuidle_driver *drv = &pseries_idle_driver; + struct cpuidle_device *dev; + + pseries_cpuidle_devices = alloc_percpu(struct cpuidle_device); + if (pseries_cpuidle_devices == NULL) + return -ENOMEM; + + for_each_possible_cpu(i) { + dev = per_cpu_ptr(pseries_cpuidle_devices, i); + dev->state_count = drv->state_count; + dev->cpu = i; + if (cpuidle_register_device(dev)) { + printk(KERN_DEBUG \ + "cpuidle_register_device %d failed!\n", i); + return -EIO; + } + } + + return 0; +} + +/* + * pseries_idle_probe() + * Choose state table for shared versus dedicated partition + */ +static int pseries_idle_probe(void) +{ + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return -ENODEV; + + if (max_idle_state == 0) { + printk(KERN_DEBUG "pseries processor idle disabled.\n"); + return -EPERM; + } + + if (get_lppaca()->shared_proc) + cpuidle_state_table = shared_states; + else + cpuidle_state_table = dedicated_states; + + return 0; +} + +static int __init pseries_processor_idle_init(void) +{ + int retval; + + retval = pseries_idle_probe(); + if (retval) + return retval; + + pseries_cpuidle_driver_init(); + retval = cpuidle_register_driver(&pseries_idle_driver); + if (retval) { + printk(KERN_DEBUG "Registration of pseries driver failed.\n"); + return retval; + } + + retval = pseries_idle_devices_init(); + if (retval) { + pseries_idle_devices_uninit(); + cpuidle_unregister_driver(&pseries_idle_driver); + return retval; + } + + printk(KERN_DEBUG "pseries_idle_driver registered\n"); + + return 0; +} + +static void __exit pseries_processor_idle_exit(void) +{ + + pseries_idle_devices_uninit(); + cpuidle_unregister_driver(&pseries_idle_driver); + + return; +} + +module_init(pseries_processor_idle_init); +module_exit(pseries_processor_idle_exit); + +MODULE_AUTHOR("Deepthi Dharwar "); +MODULE_DESCRIPTION("Cpuidle driver for POWER"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 24c7162f11d9..9a3dda07566f 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -57,4 +57,7 @@ extern struct device_node *dlpar_configure_connector(u32); extern int dlpar_attach_node(struct device_node *); extern int dlpar_detach_node(struct device_node *); +/* Snooze Delay, pseries_idle */ +DECLARE_PER_CPU(long, smt_snooze_delay); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 01df08dbc43c..f2446da7f2d5 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -585,9 +585,6 @@ static int __init pSeries_probe(void) return 1; } - -DECLARE_PER_CPU(long, smt_snooze_delay); - static void pseries_dedicated_idle_sleep(void) { unsigned int cpu = smp_processor_id(); diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 26e93fd4c62b..bbc3c42f6730 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -148,6 +148,7 @@ static void __devinit smp_xics_setup_cpu(int cpu) set_cpu_current_state(cpu, CPU_STATE_ONLINE); set_default_offline_state(cpu); #endif + pseries_notify_cpuidle_add_cpu(cpu); } static int __devinit smp_pSeries_kick_cpu(int nr) -- cgit From 760ca4dc90e624eb8f7ff85a5925151e25577758 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 30 Nov 2011 00:23:13 +0000 Subject: powerpc: Rework die() Our die() code was based off a very old x86 version. Update it to mirror the current x86 code. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/system.h | 2 +- arch/powerpc/kernel/traps.c | 128 ++++++++++++++++++++++---------------- 2 files changed, 74 insertions(+), 56 deletions(-) (limited to 'arch/powerpc/include/asm/system.h') diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h index e30a13d1ee76..d51c2c20dd23 100644 --- a/arch/powerpc/include/asm/system.h +++ b/arch/powerpc/include/asm/system.h @@ -193,8 +193,8 @@ extern void cacheable_memzero(void *p, unsigned int nb); extern void *cacheable_memcpy(void *, const void *, unsigned int); extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); extern void bad_page_fault(struct pt_regs *, unsigned long, int); -extern int die(const char *, struct pt_regs *, long); extern void _exception(int, struct pt_regs *, int, unsigned long); +extern void die(const char *, struct pt_regs *, long); extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val); #ifdef CONFIG_BOOKE_WDT diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 014f88f03d3f..c091527efd89 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -98,18 +98,14 @@ static void pmac_backlight_unblank(void) static inline void pmac_backlight_unblank(void) { } #endif -int die(const char *str, struct pt_regs *regs, long err) +static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; +static int die_counter; + +static unsigned __kprobes long oops_begin(struct pt_regs *regs) { - static struct { - raw_spinlock_t lock; - u32 lock_owner; - int lock_owner_depth; - } die = { - .lock = __RAW_SPIN_LOCK_UNLOCKED(die.lock), - .lock_owner = -1, - .lock_owner_depth = 0 - }; - static int die_counter; + int cpu; unsigned long flags; if (debugger(regs)) @@ -117,50 +113,37 @@ int die(const char *str, struct pt_regs *regs, long err) oops_enter(); - if (die.lock_owner != raw_smp_processor_id()) { - console_verbose(); - raw_spin_lock_irqsave(&die.lock, flags); - die.lock_owner = smp_processor_id(); - die.lock_owner_depth = 0; - bust_spinlocks(1); - if (machine_is(powermac)) - pmac_backlight_unblank(); - } else { - local_save_flags(flags); - } - - if (++die.lock_owner_depth < 3) { - printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); -#ifdef CONFIG_PREEMPT - printk("PREEMPT "); -#endif -#ifdef CONFIG_SMP - printk("SMP NR_CPUS=%d ", NR_CPUS); -#endif -#ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC "); -#endif -#ifdef CONFIG_NUMA - printk("NUMA "); -#endif - printk("%s\n", ppc_md.name ? ppc_md.name : ""); - - if (notify_die(DIE_OOPS, str, regs, err, 255, - SIGSEGV) == NOTIFY_STOP) - return 1; - - print_modules(); - show_regs(regs); - } else { - printk("Recursive die() failure, output suppressed\n"); + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!arch_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + arch_spin_lock(&die_lock); } + die_nest_count++; + die_owner = cpu; + console_verbose(); + bust_spinlocks(1); + if (machine_is(powermac)) + pmac_backlight_unblank(); + return flags; +} +static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, + int signr) +{ bust_spinlocks(0); - die.lock_owner = -1; + die_owner = -1; add_taint(TAINT_DIE); + die_nest_count--; oops_exit(); printk("\n"); - raw_spin_unlock_irqrestore(&die.lock, flags); + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + arch_spin_unlock(&die_lock); + raw_local_irq_restore(flags); /* * A system reset (0x100) is a request to dump, so we always send @@ -177,6 +160,9 @@ int die(const char *str, struct pt_regs *regs, long err) crash_kexec_secondary(regs); } + if (!signr) + return; + /* * While our oops output is serialised by a spinlock, output * from panic() called below can race and corrupt it. If we @@ -190,15 +176,46 @@ int die(const char *str, struct pt_regs *regs, long err) if (in_interrupt()) panic("Fatal exception in interrupt"); - if (panic_on_oops) panic("Fatal exception"); + do_exit(signr); +} - do_exit(err); +static int __kprobes __die(const char *str, struct pt_regs *regs, long err) +{ + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); +#endif +#ifdef CONFIG_SMP + printk("SMP NR_CPUS=%d ", NR_CPUS); +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC "); +#endif +#ifdef CONFIG_NUMA + printk("NUMA "); +#endif + printk("%s\n", ppc_md.name ? ppc_md.name : ""); + + if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP) + return 1; + + print_modules(); + show_regs(regs); return 0; } +void die(const char *str, struct pt_regs *regs, long err) +{ + unsigned long flags = oops_begin(regs); + + if (__die(str, regs, err)) + err = 0; + oops_end(flags, regs, err); +} + void user_single_step_siginfo(struct task_struct *tsk, struct pt_regs *regs, siginfo_t *info) { @@ -217,10 +234,11 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) "at %016lx nip %016lx lr %016lx code %x\n"; if (!user_mode(regs)) { - if (die("Exception in kernel mode", regs, signr)) - return; - } else if (show_unhandled_signals && - unhandled_signal(current, signr)) { + die("Exception in kernel mode", regs, signr); + return; + } + + if (show_unhandled_signals && unhandled_signal(current, signr)) { printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, signr, addr, regs->nip, regs->link, code); -- cgit