diff options
Diffstat (limited to 'arch/powerpc/kernel')
77 files changed, 1233 insertions, 705 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 0e3640e14eb1..4d7829399570 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -11,6 +11,7 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif +CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) @@ -47,7 +48,7 @@ obj-y := cputable.o syscalls.o \ udbg.o misc.o io.o misc_$(BITS).o \ of_platform.o prom_parse.o firmware.o \ hw_breakpoint_constraints.o interrupt.o \ - kdebugfs.o + kdebugfs.o stacktrace.o obj-y += ptrace/ obj-$(CONFIG_PPC64) += setup_64.o \ paca.o nvram_64.o note.o @@ -116,7 +117,6 @@ obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o -obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o @@ -196,3 +196,6 @@ clean-files := vmlinux.lds # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32/vdso32.so.dbg $(obj)/vdso64_wrapper.o : $(obj)/vdso64/vdso64.so.dbg + +# for cleaning +subdir- += vdso32 vdso64 diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index bf96b954a4eb..3e37ece06739 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -105,7 +105,7 @@ static struct aligninfo spe_aligninfo[32] = { * so we don't need the address swizzling. */ static int emulate_spe(struct pt_regs *regs, unsigned int reg, - struct ppc_inst ppc_instr) + ppc_inst_t ppc_instr) { union { u64 ll; @@ -300,7 +300,7 @@ Efault_write: int fix_alignment(struct pt_regs *regs) { - struct ppc_inst instr; + ppc_inst_t instr; struct instruction_op op; int r, type; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index cc05522f50bf..7582f3e3a330 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -54,7 +54,7 @@ #endif #ifdef CONFIG_PPC32 -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x #include "head_booke.h" #endif #endif @@ -139,6 +139,7 @@ int main(void) OFFSET(THR11, thread_struct, r11); OFFSET(THLR, thread_struct, lr); OFFSET(THCTR, thread_struct, ctr); + OFFSET(THSR0, thread_struct, sr0); #endif #ifdef CONFIG_SPE OFFSET(THREAD_EVR0, thread_struct, evr[0]); @@ -218,10 +219,12 @@ int main(void) OFFSET(PACA_EXGEN, paca_struct, exgen); OFFSET(PACA_EXMC, paca_struct, exmc); OFFSET(PACA_EXNMI, paca_struct, exnmi); +#ifdef CONFIG_PPC_64S_HASH_MMU OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr); OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid); OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid); OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area); +#endif OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 803c2a45b22a..9d9d56b574cc 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -161,7 +161,7 @@ void btext_map(void) boot_text_mapped = 1; } -static int btext_initialize(struct device_node *np) +static int __init btext_initialize(struct device_node *np) { unsigned int width, height, depth, pitch; unsigned long address = 0; @@ -241,8 +241,10 @@ int __init btext_find_display(int allow_nonstdout) rc = btext_initialize(np); printk("result: %d\n", rc); } - if (rc == 0) + if (rc == 0) { + of_node_put(np); break; + } } return rc; } @@ -290,7 +292,7 @@ void btext_update_display(unsigned long phys, int width, int height, } EXPORT_SYMBOL(btext_update_display); -void btext_clearscreen(void) +void __init btext_clearscreen(void) { unsigned int *base = (unsigned int *)calc_base(0, 0); unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * @@ -308,7 +310,7 @@ void btext_clearscreen(void) rmci_maybe_off(); } -void btext_flushscreen(void) +void __init btext_flushscreen(void) { unsigned int *base = (unsigned int *)calc_base(0, 0); unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * @@ -327,7 +329,7 @@ void btext_flushscreen(void) __asm__ __volatile__ ("sync" ::: "memory"); } -void btext_flushline(void) +void __init btext_flushline(void) { unsigned int *base = (unsigned int *)calc_base(0, g_loc_Y << 4); unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) * @@ -542,7 +544,7 @@ void btext_drawstring(const char *c) btext_drawchar(*c++); } -void btext_drawtext(const char *c, unsigned int len) +void __init btext_drawtext(const char *c, unsigned int len) { if (!boot_text_mapped) return; @@ -550,7 +552,7 @@ void btext_drawtext(const char *c, unsigned int len) btext_drawchar(*c++); } -void btext_drawhex(unsigned long v) +void __init btext_drawhex(unsigned long v) { if (!boot_text_mapped) return; diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index cf1be75b7833..00b0992be3e7 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -710,7 +710,7 @@ static struct kobj_attribute cache_shared_cpu_list_attr = __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); /* Attributes which should always be created -- the kobject/sysfs core - * does this automatically via kobj_type->default_attrs. This is the + * does this automatically via kobj_type->default_groups. This is the * minimum data required to uniquely identify a cache. */ static struct attribute *cache_index_default_attrs[] = { @@ -720,6 +720,7 @@ static struct attribute *cache_index_default_attrs[] = { &cache_shared_cpu_list_attr.attr, NULL, }; +ATTRIBUTE_GROUPS(cache_index_default); /* Attributes which should be created if the cache device node has the * right properties -- see cacheinfo_create_index_opt_attrs @@ -738,7 +739,7 @@ static const struct sysfs_ops cache_index_ops = { static struct kobj_type cache_index_type = { .release = cache_index_release, .sysfs_ops = &cache_index_ops, - .default_attrs = cache_index_default_attrs, + .default_groups = cache_index_default_groups, }; static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c index 3cca88ee96d7..3dc61e203f37 100644 --- a/arch/powerpc/kernel/cpu_setup_power.c +++ b/arch/powerpc/kernel/cpu_setup_power.c @@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void) static void init_PMU(void) { mtspr(SPRN_MMCRA, 0); - mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR0, MMCR0_FC); mtspr(SPRN_MMCR1, 0); mtspr(SPRN_MMCR2, 0); } @@ -123,7 +123,7 @@ static void init_PMU_ISA31(void) { mtspr(SPRN_MMCR3, 0); mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); - mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); + mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT); } /* @@ -137,6 +137,7 @@ void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t) return; mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); } @@ -150,6 +151,7 @@ void __restore_cpu_power7(void) return; mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); } @@ -164,6 +166,7 @@ void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t) return; mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ init_HFSCR(); @@ -184,6 +187,7 @@ void __restore_cpu_power8(void) return; mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ init_HFSCR(); @@ -202,6 +206,7 @@ void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t) mtspr(SPRN_PSSCR, 0); mtspr(SPRN_LPID, 0); mtspr(SPRN_PID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); @@ -223,6 +228,7 @@ void __restore_cpu_power9(void) mtspr(SPRN_PSSCR, 0); mtspr(SPRN_LPID, 0); mtspr(SPRN_PID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); @@ -242,6 +248,7 @@ void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t) mtspr(SPRN_PSSCR, 0); mtspr(SPRN_LPID, 0); mtspr(SPRN_PID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); @@ -264,6 +271,7 @@ void __restore_cpu_power10(void) mtspr(SPRN_PSSCR, 0); mtspr(SPRN_LPID, 0); mtspr(SPRN_PID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_PCR, PCR_MASK); init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 5545c9cd17c1..f55c6fb34a3a 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -27,7 +27,8 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) ppc_msgsync(); - may_hard_irq_enable(); + if (should_hard_irq_enable()) + do_hard_irq_enable(); kvmppc_clear_host_ipi(smp_processor_id()); __this_cpu_inc(irq_stat.doorbell_irqs); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index ba527fb52993..7d1b2c4a4891 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -80,6 +80,7 @@ static void __restore_cpu_cpufeatures(void) mtspr(SPRN_LPCR, system_registers.lpcr); if (hv_mode) { mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); mtspr(SPRN_HFSCR, system_registers.hfscr); mtspr(SPRN_PCR, system_registers.pcr); } @@ -216,6 +217,7 @@ static int __init feat_enable_hv(struct dt_cpu_feature *f) } mtspr(SPRN_LPID, 0); + mtspr(SPRN_AMOR, ~0); lpcr = mfspr(SPRN_LPCR); lpcr &= ~LPCR_LPES0; /* HV external interrupts */ @@ -271,6 +273,9 @@ static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f) { u64 lpcr; + if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) + return 0; + lpcr = mfspr(SPRN_LPCR); lpcr &= ~LPCR_ISL; @@ -290,6 +295,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f) { u64 lpcr; + if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) + return 0; + lpcr = mfspr(SPRN_LPCR); lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR); mtspr(SPRN_LPCR, lpcr); @@ -303,15 +311,15 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f) static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f) { -#ifdef CONFIG_PPC_RADIX_MMU + if (!IS_ENABLED(CONFIG_PPC_RADIX_MMU)) + return 0; + + cur_cpu_spec->mmu_features |= MMU_FTR_KERNEL_RO; cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; - cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE; cur_cpu_spec->mmu_features |= MMU_FTR_GTSE; cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU; return 1; -#endif - return 0; } static int __init feat_enable_dscr(struct dt_cpu_feature *f) @@ -336,7 +344,7 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f) return 1; } -static void hfscr_pmu_enable(void) +static void __init hfscr_pmu_enable(void) { u64 hfscr = mfspr(SPRN_HFSCR); hfscr |= PPC_BIT(60); @@ -351,7 +359,7 @@ static void init_pmu_power8(void) } mtspr(SPRN_MMCRA, 0); - mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR0, MMCR0_FC); mtspr(SPRN_MMCR1, 0); mtspr(SPRN_MMCR2, 0); mtspr(SPRN_MMCRS, 0); @@ -390,7 +398,7 @@ static void init_pmu_power9(void) mtspr(SPRN_MMCRC, 0); mtspr(SPRN_MMCRA, 0); - mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR0, MMCR0_FC); mtspr(SPRN_MMCR1, 0); mtspr(SPRN_MMCR2, 0); } @@ -426,7 +434,7 @@ static void init_pmu_power10(void) mtspr(SPRN_MMCR3, 0); mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); - mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); + mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT); } static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index 9bdaaf7fddc9..2f9dbf8ad2ee 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -280,7 +280,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v) } DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache); -void eeh_cache_debugfs_init(void) +void __init eeh_cache_debugfs_init(void) { debugfs_create_file_unsafe("eeh_address_cache", 0400, arch_debugfs_dir, NULL, diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 350dab18e137..422f80b5b27b 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -905,18 +905,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe) } #endif /* CONFIG_STACKTRACE */ + eeh_for_each_pe(pe, tmp_pe) + eeh_pe_for_each_dev(tmp_pe, edev, tmp) + edev->mode &= ~EEH_DEV_NO_HANDLER; + eeh_pe_update_time_stamp(pe); pe->freeze_count++; if (pe->freeze_count > eeh_max_freezes) { pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n", pe->phb->global_number, pe->addr, pe->freeze_count); - result = PCI_ERS_RESULT_DISCONNECT; - } - eeh_for_each_pe(pe, tmp_pe) - eeh_pe_for_each_dev(tmp_pe, edev, tmp) - edev->mode &= ~EEH_DEV_NO_HANDLER; + goto recover_failed; + } /* Walk the various device drivers attached to this slot through * a reset sequence, giving each an opportunity to do what it needs @@ -928,39 +929,38 @@ void eeh_handle_normal_event(struct eeh_pe *pe) * the error. Override the result if necessary to have partially * hotplug for this case. */ - if (result != PCI_ERS_RESULT_DISCONNECT) { - pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", - pe->freeze_count, eeh_max_freezes); - pr_info("EEH: Notify device drivers to shutdown\n"); - eeh_set_channel_state(pe, pci_channel_io_frozen); - eeh_set_irq_state(pe, false); - eeh_pe_report("error_detected(IO frozen)", pe, - eeh_report_error, &result); - if ((pe->type & EEH_PE_PHB) && - result != PCI_ERS_RESULT_NONE && - result != PCI_ERS_RESULT_NEED_RESET) - result = PCI_ERS_RESULT_NEED_RESET; - } + pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", + pe->freeze_count, eeh_max_freezes); + pr_info("EEH: Notify device drivers to shutdown\n"); + eeh_set_channel_state(pe, pci_channel_io_frozen); + eeh_set_irq_state(pe, false); + eeh_pe_report("error_detected(IO frozen)", pe, + eeh_report_error, &result); + if (result == PCI_ERS_RESULT_DISCONNECT) + goto recover_failed; + + /* + * Error logged on a PHB are always fences which need a full + * PHB reset to clear so force that to happen. + */ + if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE) + result = PCI_ERS_RESULT_NEED_RESET; /* Get the current PCI slot state. This can take a long time, * sometimes over 300 seconds for certain systems. */ - if (result != PCI_ERS_RESULT_DISCONNECT) { - rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); - if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { - pr_warn("EEH: Permanent failure\n"); - result = PCI_ERS_RESULT_DISCONNECT; - } + rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY * 1000); + if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { + pr_warn("EEH: Permanent failure\n"); + goto recover_failed; } /* Since rtas may enable MMIO when posting the error log, * don't post the error log until after all dev drivers * have been informed. */ - if (result != PCI_ERS_RESULT_DISCONNECT) { - pr_info("EEH: Collect temporary log\n"); - eeh_slot_error_detail(pe, EEH_LOG_TEMP); - } + pr_info("EEH: Collect temporary log\n"); + eeh_slot_error_detail(pe, EEH_LOG_TEMP); /* If all device drivers were EEH-unaware, then shut * down all of the device drivers, and hope they @@ -970,9 +970,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Reset with hotplug activity\n"); rc = eeh_reset_device(pe, bus, NULL, false); if (rc) { - pr_warn("%s: Unable to reset, err=%d\n", - __func__, rc); - result = PCI_ERS_RESULT_DISCONNECT; + pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); + goto recover_failed; } } @@ -980,10 +979,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe) if (result == PCI_ERS_RESULT_CAN_RECOVER) { pr_info("EEH: Enable I/O for affected devices\n"); rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); + if (rc < 0) + goto recover_failed; - if (rc < 0) { - result = PCI_ERS_RESULT_DISCONNECT; - } else if (rc) { + if (rc) { result = PCI_ERS_RESULT_NEED_RESET; } else { pr_info("EEH: Notify device drivers to resume I/O\n"); @@ -991,15 +990,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe) eeh_report_mmio_enabled, &result); } } - - /* If all devices reported they can proceed, then re-enable DMA */ if (result == PCI_ERS_RESULT_CAN_RECOVER) { pr_info("EEH: Enabled DMA for affected devices\n"); rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); + if (rc < 0) + goto recover_failed; - if (rc < 0) { - result = PCI_ERS_RESULT_DISCONNECT; - } else if (rc) { + if (rc) { result = PCI_ERS_RESULT_NEED_RESET; } else { /* @@ -1017,16 +1014,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe) pr_info("EEH: Reset without hotplug activity\n"); rc = eeh_reset_device(pe, bus, &rmv_data, true); if (rc) { - pr_warn("%s: Cannot reset, err=%d\n", - __func__, rc); - result = PCI_ERS_RESULT_DISCONNECT; - } else { - result = PCI_ERS_RESULT_NONE; - eeh_set_channel_state(pe, pci_channel_io_normal); - eeh_set_irq_state(pe, true); - eeh_pe_report("slot_reset", pe, eeh_report_reset, - &result); + pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); + goto recover_failed; } + + result = PCI_ERS_RESULT_NONE; + eeh_set_channel_state(pe, pci_channel_io_normal); + eeh_set_irq_state(pe, true); + eeh_pe_report("slot_reset", pe, eeh_report_reset, + &result); } if ((result == PCI_ERS_RESULT_RECOVERED) || @@ -1054,45 +1050,47 @@ void eeh_handle_normal_event(struct eeh_pe *pe) } pr_info("EEH: Recovery successful.\n"); - } else { - /* - * About 90% of all real-life EEH failures in the field - * are due to poorly seated PCI cards. Only 10% or so are - * due to actual, failed cards. - */ - pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" - "Please try reseating or replacing it\n", - pe->phb->global_number, pe->addr); + goto out; + } - eeh_slot_error_detail(pe, EEH_LOG_PERM); +recover_failed: + /* + * About 90% of all real-life EEH failures in the field + * are due to poorly seated PCI cards. Only 10% or so are + * due to actual, failed cards. + */ + pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" + "Please try reseating or replacing it\n", + pe->phb->global_number, pe->addr); - /* Notify all devices that they're about to go down. */ - eeh_set_channel_state(pe, pci_channel_io_perm_failure); - eeh_set_irq_state(pe, false); - eeh_pe_report("error_detected(permanent failure)", pe, - eeh_report_failure, NULL); + eeh_slot_error_detail(pe, EEH_LOG_PERM); - /* Mark the PE to be removed permanently */ - eeh_pe_state_mark(pe, EEH_PE_REMOVED); + /* Notify all devices that they're about to go down. */ + eeh_set_channel_state(pe, pci_channel_io_perm_failure); + eeh_set_irq_state(pe, false); + eeh_pe_report("error_detected(permanent failure)", pe, + eeh_report_failure, NULL); - /* - * Shut down the device drivers for good. We mark - * all removed devices correctly to avoid access - * the their PCI config any more. - */ - if (pe->type & EEH_PE_VF) { - eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); - } else { - eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); - eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + /* Mark the PE to be removed permanently */ + eeh_pe_state_mark(pe, EEH_PE_REMOVED); - pci_lock_rescan_remove(); - pci_hp_remove_devices(bus); - pci_unlock_rescan_remove(); - /* The passed PE should no longer be used */ - return; - } + /* + * Shut down the device drivers for good. We mark + * all removed devices correctly to avoid access + * the their PCI config any more. + */ + if (pe->type & EEH_PE_VF) { + eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + } else { + eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); + eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + + pci_lock_rescan_remove(); + pci_hp_remove_devices(bus); + pci_unlock_rescan_remove(); + /* The passed PE should no longer be used */ + return; } out: diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 61fdd53cdd9a..7748c278d13c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -73,13 +73,39 @@ prepare_transfer_to_handler: _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler) #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ +#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) + .globl __kuep_lock +__kuep_lock: + lwz r9, THREAD+THSR0(r2) + update_user_segments_by_4 r9, r10, r11, r12 + blr + +__kuep_unlock: + lwz r9, THREAD+THSR0(r2) + rlwinm r9,r9,0,~SR_NX + update_user_segments_by_4 r9, r10, r11, r12 + blr + +.macro kuep_lock + bl __kuep_lock +.endm +.macro kuep_unlock + bl __kuep_unlock +.endm +#else +.macro kuep_lock +.endm +.macro kuep_unlock +.endm +#endif + .globl transfer_to_syscall transfer_to_syscall: stw r11, GPR1(r1) stw r11, 0(r1) mflr r12 stw r12, _LINK(r1) -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #endif lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ @@ -90,10 +116,10 @@ transfer_to_syscall: stw r12,8(r1) stw r2,_TRAP(r1) SAVE_GPR(0, r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) addi r2,r10,-THREAD SAVE_NVGPRS(r1) + kuep_lock /* Calling convention has r9 = orig r0, r10 = regs */ addi r10,r1,STACK_FRAME_OVERHEAD @@ -110,6 +136,7 @@ ret_from_syscall: cmplwi cr0,r5,0 bne- 2f #endif /* CONFIG_PPC_47x */ + kuep_unlock lwz r4,_LINK(r1) lwz r5,_CCR(r1) mtlr r4 @@ -139,7 +166,7 @@ syscall_exit_finish: mtxer r5 lwz r0,GPR0(r1) lwz r3,GPR3(r1) - REST_8GPRS(4,r1) + REST_GPRS(4, 11, r1) lwz r12,GPR12(r1) b 1b @@ -232,9 +259,9 @@ fast_exception_return: beq 3f /* if not, we've got problems */ #endif -2: REST_4GPRS(3, r11) +2: REST_GPRS(3, 6, r11) lwz r10,_CCR(r11) - REST_2GPRS(1, r11) + REST_GPRS(1, 2, r11) mtcr r10 lwz r10,_LINK(r11) mtlr r10 @@ -273,6 +300,7 @@ interrupt_return: beq .Lkernel_interrupt_return bl interrupt_exit_user_prepare cmpwi r3,0 + kuep_unlock bne- .Lrestore_nvgprs .Lfast_user_interrupt_return: @@ -298,16 +326,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) * the reliable stack unwinder later on. Clear it. */ stw r0,8(r1) - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) + REST_GPRS(7, 12, r1) mtcr r3 mtlr r4 mtctr r5 mtspr SPRN_XER,r6 - REST_4GPRS(2, r1) - REST_GPR(6, r1) + REST_GPRS(2, 6, r1) REST_GPR(0, r1) REST_GPR(1, r1) rfi @@ -341,8 +367,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) lwz r6,_CCR(r1) li r0,0 - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) + REST_GPRS(7, 12, r1) mtlr r3 mtctr r4 @@ -354,7 +379,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) */ stw r0,8(r1) - REST_4GPRS(2, r1) + REST_GPRS(2, 5, r1) bne- cr1,1f /* emulate stack store */ mtcr r6 @@ -430,8 +455,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) bne interrupt_return; \ lwz r0,GPR0(r1); \ lwz r2,GPR2(r1); \ - REST_4GPRS(3, r1); \ - REST_2GPRS(7, r1); \ + REST_GPRS(3, 8, r1); \ lwz r10,_XER(r1); \ lwz r11,_CTR(r1); \ mtspr SPRN_XER,r10; \ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 70cff7b49e17..9581906b5ee9 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -180,7 +180,7 @@ _GLOBAL(_switch) #endif ld r8,KSP(r4) /* new stack pointer */ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION b 2f END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) @@ -232,7 +232,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) slbmte r7,r0 isync 2: -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_64S_HASH_MMU */ clrrdi r7, r8, THREAD_SHIFT /* base of new stack */ /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 93b0f3ec8fb0..d4b8aff20815 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -37,7 +37,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node, return -1; for (i = 0; i < (len / 4); i++) { - struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i])); + ppc_inst_t inst = ppc_inst(be32_to_cpu(insts[i])); patch_instruction(epapr_hypercall_start + i, inst); #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64) patch_instruction(epapr_ev_idle_start + i, inst); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 711c66b76df1..67dc4e3179a0 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -198,8 +198,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) stdcx. r0,0,r1 /* to clear the reservation */ - REST_4GPRS(2, r1) - REST_4GPRS(6, r1) + REST_GPRS(2, 9, r1) ld r10,_CTR(r1) ld r11,_XER(r1) @@ -375,9 +374,7 @@ ret_from_mc_except: exc_##n##_common: \ std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ - SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ - std r9,GPR9(r1); /* save r9 in stackframe */ \ + SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ std r10,_NIP(r1); /* save SRR0 to stackframe */ \ std r11,_MSR(r1); /* save SRR1 to stackframe */ \ beq 2f; /* if from kernel mode */ \ @@ -1061,9 +1058,7 @@ bad_stack_book3e: std r11,_ESR(r1) std r0,GPR0(r1); /* save r0 in stackframe */ \ std r2,GPR2(r1); /* save r2 in stackframe */ \ - SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ - SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ - std r9,GPR9(r1); /* save r9 in stackframe */ \ + SAVE_GPRS(3, 9, r1); /* save r3 - r9 in stackframe */ \ ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \ ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \ mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \ @@ -1077,8 +1072,7 @@ bad_stack_book3e: std r10,_LINK(r1) std r11,_CTR(r1) std r12,_XER(r1) - SAVE_10GPRS(14,r1) - SAVE_8GPRS(24,r1) + SAVE_GPRS(14, 31, r1) lhz r12,PACA_TRAP_SAVE(r13) std r12,_TRAP(r1) addi r11,r1,INT_FRAME_SIZE diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index eaf1f72131a1..55caeee37c08 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -48,7 +48,7 @@ .balign IFETCH_ALIGN_BYTES; \ .global name; \ _ASM_NOKPROBE_SYMBOL(name); \ - DEFINE_FIXED_SYMBOL(name); \ + DEFINE_FIXED_SYMBOL(name, text); \ name: #define TRAMP_REAL_BEGIN(name) \ @@ -76,31 +76,18 @@ name: ld reg,PACAKBASE(r13); /* get high part of &label */ \ ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label) -#define __LOAD_HANDLER(reg, label) \ +#define __LOAD_HANDLER(reg, label, section) \ ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(label))@l + ori reg,reg,(ABS_ADDR(label, section))@l /* * Branches from unrelocated code (e.g., interrupts) to labels outside * head-y require >64K offsets. */ -#define __LOAD_FAR_HANDLER(reg, label) \ +#define __LOAD_FAR_HANDLER(reg, label, section) \ ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(label))@l; \ - addis reg,reg,(ABS_ADDR(label))@h - -/* - * Branch to label using its 0xC000 address. This results in instruction - * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned - * on using mtmsr rather than rfid. - * - * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than - * load KBASE for a slight optimisation. - */ -#define BRANCH_TO_C000(reg, label) \ - __LOAD_FAR_HANDLER(reg, label); \ - mtctr reg; \ - bctr + ori reg,reg,(ABS_ADDR(label, section))@l; \ + addis reg,reg,(ABS_ADDR(label, section))@h /* * Interrupt code generation macros @@ -111,9 +98,10 @@ name: #define IAREA .L_IAREA_\name\() /* PACA save area */ #define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */ #define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */ +#define ICFAR .L_ICFAR_\name\() /* Uses CFAR */ +#define ICFAR_IF_HVMODE .L_ICFAR_IF_HVMODE_\name\() /* Uses CFAR if HV */ #define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */ #define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */ -#define ISET_RI .L_ISET_RI_\name\() /* Run common code w/ MSR[RI]=1 */ #define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */ #define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */ #define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */ @@ -151,15 +139,18 @@ do_define_int n .ifndef IISIDE IISIDE=0 .endif + .ifndef ICFAR + ICFAR=1 + .endif + .ifndef ICFAR_IF_HVMODE + ICFAR_IF_HVMODE=0 + .endif .ifndef IDAR IDAR=0 .endif .ifndef IDSISR IDSISR=0 .endif - .ifndef ISET_RI - ISET_RI=1 - .endif .ifndef IBRANCH_TO_COMMON IBRANCH_TO_COMMON=1 .endif @@ -291,9 +282,21 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) HMT_MEDIUM std r10,IAREA+EX_R10(r13) /* save r10 - r12 */ + .if ICFAR BEGIN_FTR_SECTION mfspr r10,SPRN_CFAR END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + .elseif ICFAR_IF_HVMODE +BEGIN_FTR_SECTION + BEGIN_FTR_SECTION_NESTED(69) + mfspr r10,SPRN_CFAR + END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69) +FTR_SECTION_ELSE + BEGIN_FTR_SECTION_NESTED(69) + li r10,0 + END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69) +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) + .endif .if \ool .if !\virt b tramp_real_\name @@ -309,9 +312,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) BEGIN_FTR_SECTION std r9,IAREA+EX_PPR(r13) END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) + .if ICFAR || ICFAR_IF_HVMODE BEGIN_FTR_SECTION std r10,IAREA+EX_CFAR(r13) END_FTR_SECTION_IFSET(CPU_FTR_CFAR) + .endif INTERRUPT_TO_KERNEL mfctr r10 std r10,IAREA+EX_CTR(r13) @@ -376,7 +381,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) * This switches to virtual mode and sets MSR[RI]. */ .macro __GEN_COMMON_ENTRY name -DEFINE_FIXED_SYMBOL(\name\()_common_real) +DEFINE_FIXED_SYMBOL(\name\()_common_real, text) \name\()_common_real: .if IKVM_REAL KVMTEST \name kvm_interrupt @@ -399,7 +404,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) .endif .balign IFETCH_ALIGN_BYTES -DEFINE_FIXED_SYMBOL(\name\()_common_virt) +DEFINE_FIXED_SYMBOL(\name\()_common_virt, text) \name\()_common_virt: .if IKVM_VIRT KVMTEST \name kvm_interrupt @@ -413,7 +418,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt) * want to run in real mode. */ .macro __GEN_REALMODE_COMMON_ENTRY name -DEFINE_FIXED_SYMBOL(\name\()_common_real) +DEFINE_FIXED_SYMBOL(\name\()_common_real, text) \name\()_common_real: .if IKVM_REAL KVMTEST \name kvm_interrupt @@ -512,11 +517,6 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) stb r10,PACASRR_VALID(r13) .endif - .if ISET_RI - li r10,MSR_RI - mtmsrd r10,1 /* Set MSR_RI */ - .endif - .if ISTACK .if IKUAP kuap_save_amr_and_lock r9, r10, cr1, cr0 @@ -568,14 +568,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) .endif BEGIN_FTR_SECTION + .if ICFAR || ICFAR_IF_HVMODE ld r10,IAREA+EX_CFAR(r13) + .else + li r10,0 + .endif std r10,ORIG_GPR3(r1) END_FTR_SECTION_IFSET(CPU_FTR_CFAR) ld r10,IAREA+EX_CTR(r13) std r10,_CTR(r1) std r2,GPR2(r1) /* save r2 in stackframe */ - SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */ - SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */ + SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe */ mflr r9 /* Get LR, later save to stack */ ld r2,PACATOC(r13) /* get kernel TOC into r2 */ std r9,_LINK(r1) @@ -693,8 +696,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) mtlr r9 ld r9,_CCR(r1) mtcr r9 - REST_8GPRS(2, r1) - REST_4GPRS(10, r1) + REST_GPRS(2, 13, r1) REST_GPR(0, r1) /* restore original r1. */ ld r1,GPR1(r1) @@ -850,12 +852,12 @@ SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000) #ifdef CONFIG_RELOCATABLE TRAMP_VIRT_BEGIN(system_call_vectored_tramp) - __LOAD_HANDLER(r10, system_call_vectored_common) + __LOAD_HANDLER(r10, system_call_vectored_common, virt_trampolines) mtctr r10 bctr TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp) - __LOAD_HANDLER(r10, system_call_vectored_sigill) + __LOAD_HANDLER(r10, system_call_vectored_sigill, virt_trampolines) mtctr r10 bctr #endif @@ -902,11 +904,6 @@ INT_DEFINE_BEGIN(system_reset) IVEC=0x100 IAREA=PACA_EXNMI IVIRT=0 /* no virt entry point */ - /* - * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is - * being used, so a nested NMI exception would corrupt it. - */ - ISET_RI=0 ISTACK=0 IKVM_REAL=1 INT_DEFINE_END(system_reset) @@ -964,7 +961,9 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake) /* We are waking up from idle, so may clobber any volatile register */ cmpwi cr1,r5,2 bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */ - BRANCH_TO_C000(r12, DOTSYM(idle_return_gpr_loss)) + __LOAD_FAR_HANDLER(r12, DOTSYM(idle_return_gpr_loss), real_trampolines) + mtctr r12 + bctr #endif #ifdef CONFIG_PPC_PSERIES @@ -979,16 +978,14 @@ TRAMP_REAL_BEGIN(system_reset_fwnmi) EXC_COMMON_BEGIN(system_reset_common) __GEN_COMMON_ENTRY system_reset /* - * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able - * to recover, but nested NMI will notice in_nmi and not recover - * because of the use of the NMI stack. in_nmi reentrancy is tested in - * system_reset_exception. + * Increment paca->in_nmi. When the interrupt entry wrapper later + * enable MSR_RI, then SLB or MCE will be able to recover, but a nested + * NMI will notice in_nmi and not recover because of the use of the NMI + * stack. in_nmi reentrancy is tested in system_reset_exception. */ lhz r10,PACA_IN_NMI(r13) addi r10,r10,1 sth r10,PACA_IN_NMI(r13) - li r10,MSR_RI - mtmsrd r10,1 mr r10,r1 ld r1,PACA_NMI_EMERG_SP(r13) @@ -1062,12 +1059,6 @@ INT_DEFINE_BEGIN(machine_check_early) IAREA=PACA_EXMC IVIRT=0 /* no virt entry point */ IREALMODE_COMMON=1 - /* - * MSR_RI is not enabled, because PACA_EXMC is being used, so a - * nested machine check corrupts it. machine_check_common enables - * MSR_RI. - */ - ISET_RI=0 ISTACK=0 IDAR=1 IDSISR=1 @@ -1078,7 +1069,6 @@ INT_DEFINE_BEGIN(machine_check) IVEC=0x200 IAREA=PACA_EXMC IVIRT=0 /* no virt entry point */ - ISET_RI=0 IDAR=1 IDSISR=1 IKVM_REAL=1 @@ -1148,9 +1138,6 @@ EXC_COMMON_BEGIN(machine_check_early_common) BEGIN_FTR_SECTION bl enable_machine_check END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - li r10,MSR_RI - mtmsrd r10,1 - addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_early std r3,RESULT(r1) /* Save result */ @@ -1238,10 +1225,6 @@ EXC_COMMON_BEGIN(machine_check_common) * save area: PACA_EXMC instead of PACA_EXGEN. */ GEN_COMMON machine_check - - /* Enable MSR_RI when finished with PACA_EXMC */ - li r10,MSR_RI - mtmsrd r10,1 addi r3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception_async b interrupt_return_srr @@ -1369,11 +1352,15 @@ EXC_COMMON_BEGIN(data_access_common) addi r3,r1,STACK_FRAME_OVERHEAD andis. r0,r4,DSISR_DABRMATCH@h bne- 1f +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION bl do_hash_fault MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else + bl do_page_fault +#endif b interrupt_return_srr 1: bl do_break @@ -1416,6 +1403,7 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) EXC_VIRT_END(data_access_slb, 0x4380, 0x80) EXC_COMMON_BEGIN(data_access_slb_common) GEN_COMMON data_access_slb +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -1428,9 +1416,12 @@ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else + li r3,-EFAULT +#endif std r3,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD - bl do_bad_slb_fault + bl do_bad_segment_interrupt b interrupt_return_srr @@ -1462,11 +1453,15 @@ EXC_VIRT_END(instruction_access, 0x4400, 0x80) EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION bl do_hash_fault MMU_FTR_SECTION_ELSE bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else + bl do_page_fault +#endif b interrupt_return_srr @@ -1496,6 +1491,7 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) EXC_COMMON_BEGIN(instruction_access_slb_common) GEN_COMMON instruction_access_slb +#ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ addi r3,r1,STACK_FRAME_OVERHEAD @@ -1508,9 +1504,12 @@ MMU_FTR_SECTION_ELSE /* Radix case, access is outside page table range */ li r3,-EFAULT ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else + li r3,-EFAULT +#endif std r3,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD - bl do_bad_slb_fault + bl do_bad_segment_interrupt b interrupt_return_srr @@ -1536,6 +1535,12 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * * If soft masked, the masked handler will note the pending interrupt for * replay, and clear MSR[EE] in the interrupted context. + * + * CFAR is not required because this is an asynchronous interrupt that in + * general won't have much bearing on the state of the CPU, with the possible + * exception of crash/debug IPIs, but those are generally moving to use SRESET + * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case + * it may be exiting the guest and need CFAR to be saved. */ INT_DEFINE_BEGIN(hardware_interrupt) IVEC=0x500 @@ -1543,6 +1548,10 @@ INT_DEFINE_BEGIN(hardware_interrupt) IMASK=IRQS_DISABLED IKVM_REAL=1 IKVM_VIRT=1 + ICFAR=0 +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ICFAR_IF_HVMODE=1 +#endif INT_DEFINE_END(hardware_interrupt) EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) @@ -1764,6 +1773,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) * If PPC_WATCHDOG is configured, the soft masked handler will actually set * things back up to run soft_nmi_interrupt as a regular interrupt handler * on the emergency stack. + * + * CFAR is not required because this is asynchronous (see hardware_interrupt). + * A watchdog interrupt may like to have CFAR, but usually the interesting + * branch is long gone by that point (e.g., infinite loop). */ INT_DEFINE_BEGIN(decrementer) IVEC=0x900 @@ -1771,6 +1784,7 @@ INT_DEFINE_BEGIN(decrementer) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + ICFAR=0 INT_DEFINE_END(decrementer) EXC_REAL_BEGIN(decrementer, 0x900, 0x80) @@ -1846,6 +1860,8 @@ EXC_COMMON_BEGIN(hdecrementer_common) * If soft masked, the masked handler will note the pending interrupt for * replay, leaving MSR[EE] enabled in the interrupted context because the * doorbells are edge triggered. + * + * CFAR is not required, similarly to hardware_interrupt. */ INT_DEFINE_BEGIN(doorbell_super) IVEC=0xa00 @@ -1853,6 +1869,7 @@ INT_DEFINE_BEGIN(doorbell_super) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + ICFAR=0 INT_DEFINE_END(doorbell_super) EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100) @@ -1904,6 +1921,7 @@ INT_DEFINE_BEGIN(system_call) IVEC=0xc00 IKVM_REAL=1 IKVM_VIRT=1 + ICFAR=0 INT_DEFINE_END(system_call) .macro SYSTEM_CALL virt @@ -1942,12 +1960,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) HMT_MEDIUM .if ! \virt - __LOAD_HANDLER(r10, system_call_common_real) + __LOAD_HANDLER(r10, system_call_common_real, real_vectors) mtctr r10 bctr .else #ifdef CONFIG_RELOCATABLE - __LOAD_HANDLER(r10, system_call_common) + __LOAD_HANDLER(r10, system_call_common, virt_vectors) mtctr r10 bctr #else @@ -2001,7 +2019,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives * outside the head section. */ - __LOAD_FAR_HANDLER(r10, kvmppc_hcall) + __LOAD_FAR_HANDLER(r10, kvmppc_hcall, real_trampolines) mtctr r10 bctr #else @@ -2202,6 +2220,11 @@ EXC_COMMON_BEGIN(hmi_exception_common) * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt. * This is an asynchronous interrupt in response to a msgsnd doorbell. * Similar to the 0xa00 doorbell but for host rather than guest. + * + * CFAR is not required (similar to doorbell_interrupt), unless KVM HV + * is enabled, in which case it may be a guest exit. Most PowerNV kernels + * include KVM support so it would be nice if this could be dynamically + * patched out if KVM was not currently running any guests. */ INT_DEFINE_BEGIN(h_doorbell) IVEC=0xe80 @@ -2209,6 +2232,9 @@ INT_DEFINE_BEGIN(h_doorbell) IMASK=IRQS_DISABLED IKVM_REAL=1 IKVM_VIRT=1 +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ICFAR=0 +#endif INT_DEFINE_END(h_doorbell) EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20) @@ -2232,6 +2258,9 @@ EXC_COMMON_BEGIN(h_doorbell_common) * Interrupt 0xea0 - Hypervisor Virtualization Interrupt. * This is an asynchronous interrupt in response to an "external exception". * Similar to 0x500 but for host only. + * + * Like h_doorbell, CFAR is only required for KVM HV because this can be + * a guest exit. */ INT_DEFINE_BEGIN(h_virt_irq) IVEC=0xea0 @@ -2239,6 +2268,9 @@ INT_DEFINE_BEGIN(h_virt_irq) IMASK=IRQS_DISABLED IKVM_REAL=1 IKVM_VIRT=1 +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE + ICFAR=0 +#endif INT_DEFINE_END(h_virt_irq) EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20) @@ -2275,6 +2307,8 @@ EXC_VIRT_NONE(0x4ee0, 0x20) * * If soft masked, the masked handler will note the pending interrupt for * replay, and clear MSR[EE] in the interrupted context. + * + * CFAR is not used by perf interrupts so not required. */ INT_DEFINE_BEGIN(performance_monitor) IVEC=0xf00 @@ -2282,6 +2316,7 @@ INT_DEFINE_BEGIN(performance_monitor) #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_REAL=1 #endif + ICFAR=0 INT_DEFINE_END(performance_monitor) EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20) @@ -2706,6 +2741,7 @@ EXC_VIRT_NONE(0x5800, 0x100) INT_DEFINE_BEGIN(soft_nmi) IVEC=0x900 ISTACK=0 + ICFAR=0 INT_DEFINE_END(soft_nmi) /* @@ -3025,7 +3061,7 @@ USE_FIXED_SECTION(virt_trampolines) .align 7 .globl __end_interrupts __end_interrupts: -DEFINE_FIXED_SYMBOL(__end_interrupts) +DEFINE_FIXED_SYMBOL(__end_interrupts, virt_trampolines) CLOSE_FIXED_SECTION(real_vectors); CLOSE_FIXED_SECTION(real_trampolines); diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index b7ceb041743c..d03e488cfe9c 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -251,7 +251,7 @@ bool is_fadump_reserved_mem_contiguous(void) } /* Print firmware assisted dump configurations for debugging purpose. */ -static void fadump_show_config(void) +static void __init fadump_show_config(void) { int i; @@ -353,7 +353,7 @@ static __init u64 fadump_calculate_reserve_size(void) * Calculate the total memory size required to be reserved for * firmware-assisted dump registration. */ -static unsigned long get_fadump_area_size(void) +static unsigned long __init get_fadump_area_size(void) { unsigned long size = 0; @@ -462,7 +462,7 @@ static int __init fadump_get_boot_mem_regions(void) * with the given memory range. * False, otherwise. */ -static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx) +static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx) { bool ret = false; int i; @@ -737,7 +737,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) fw_dump.ops->fadump_trigger(fdh, str); } -u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) +u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) { struct elf_prstatus prstatus; @@ -752,7 +752,7 @@ u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) return buf; } -void fadump_update_elfcore_header(char *bufp) +void __init fadump_update_elfcore_header(char *bufp) { struct elf_phdr *phdr; @@ -770,7 +770,7 @@ void fadump_update_elfcore_header(char *bufp) return; } -static void *fadump_alloc_buffer(unsigned long size) +static void *__init fadump_alloc_buffer(unsigned long size) { unsigned long count, i; struct page *page; @@ -792,7 +792,7 @@ static void fadump_free_buffer(unsigned long vaddr, unsigned long size) free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL); } -s32 fadump_setup_cpu_notes_buf(u32 num_cpus) +s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus) { /* Allocate buffer to hold cpu crash notes. */ fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); @@ -1447,7 +1447,7 @@ static ssize_t release_mem_store(struct kobject *kobj, } /* Release the reserved memory and disable the FADump */ -static void unregister_fadump(void) +static void __init unregister_fadump(void) { fadump_cleanup(); fadump_release_memory(fw_dump.reserve_dump_area_start, @@ -1547,7 +1547,7 @@ ATTRIBUTE_GROUPS(fadump); DEFINE_SHOW_ATTRIBUTE(fadump_region); -static void fadump_init_files(void) +static void __init fadump_init_files(void) { int rc = 0; @@ -1641,6 +1641,14 @@ int __init setup_fadump(void) else if (fw_dump.reserve_dump_area_size) fw_dump.ops->fadump_init_mem_struct(&fw_dump); + /* + * In case of panic, fadump is triggered via ppc_panic_event() + * panic notifier. Setting crash_kexec_post_notifiers to 'true' + * lets panic() function take crash friendly path before panic + * notifiers are invoked. + */ + crash_kexec_post_notifiers = true; + return 1; } subsys_initcall(setup_fadump); diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index ba4afe3b5a9c..f71f2bbd4de6 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -81,7 +81,12 @@ EXPORT_SYMBOL(store_fp_state) */ _GLOBAL(load_up_fpu) mfmsr r5 +#ifdef CONFIG_PPC_BOOK3S_64 + /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ + ori r5,r5,MSR_FP|MSR_RI +#else ori r5,r5,MSR_FP +#endif #ifdef CONFIG_VSX BEGIN_FTR_SECTION oris r5,r5,MSR_VSX@h diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 6b1ec9e3541b..c3286260a7d1 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -115,8 +115,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) stw r10,8(r1) li r10, \trapno stw r10,_TRAP(r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) SAVE_NVGPRS(r1) stw r2,GPR2(r1) stw r12,_NIP(r1) @@ -136,6 +135,12 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) andi. r12,r9,MSR_PR bne 777f bl prepare_transfer_to_handler +#ifdef CONFIG_PPC_KUEP + b 778f +777: + bl __kuep_lock +778: +#endif 777: #endif .endm @@ -202,11 +207,11 @@ vmap_stack_overflow: mfspr r1, SPRN_SPRG_THREAD lwz r1, TASK_CPU - THREAD(r1) slwi r1, r1, 3 - addis r1, r1, emergency_ctx@ha + addis r1, r1, emergency_ctx-PAGE_OFFSET@ha #else - lis r1, emergency_ctx@ha + lis r1, emergency_ctx-PAGE_OFFSET@ha #endif - lwz r1, emergency_ctx@l(r1) + lwz r1, emergency_ctx-PAGE_OFFSET@l(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE EXCEPTION_PROLOG_2 0 vmap_stack_overflow prepare_transfer_to_handler diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 7d72ee5ab387..b6c6d1de5fd5 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/pgtable.h> +#include <linux/sizes.h> #include <asm/processor.h> #include <asm/page.h> #include <asm/mmu.h> @@ -297,6 +298,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) 3: mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP + rlwinm. r9, r9, 0, 0xff + beq 5f /* Kuap fault */ +#endif 4: tophys(r11, r11) rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ @@ -377,6 +382,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt) 3: mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP + rlwinm. r9, r9, 0, 0xff + beq 5f /* Kuap fault */ +#endif 4: tophys(r11, r11) rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ @@ -650,7 +659,7 @@ start_here: b . /* prevent prefetch past rfi */ /* Set up the initial MMU state so we can do the first level of - * kernel initialization. This maps the first 16 MBytes of memory 1:1 + * kernel initialization. This maps the first 32 MBytes of memory 1:1 * virtual to physical and more importantly sets the cache mode. */ initial_mmu: @@ -687,6 +696,12 @@ initial_mmu: tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ + li r0,62 /* TLB slot 62 */ + addis r4,r4,SZ_16M@h + addis r3,r3,SZ_16M@h + tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ + tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ + isync /* Establish the exception vector base diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 02d2928d1e01..b73a56466903 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -334,6 +334,10 @@ interrupt_base: mfspr r12,SPRN_MMUCR mfspr r13,SPRN_PID /* Get PID */ rlwimi r12,r13,0,24,31 /* Set TID */ +#ifdef CONFIG_PPC_KUAP + cmpwi r13,0 + beq 2f /* KUAP Fault */ +#endif 4: mtspr SPRN_MMUCR,r12 @@ -444,6 +448,10 @@ interrupt_base: mfspr r12,SPRN_MMUCR mfspr r13,SPRN_PID /* Get PID */ rlwimi r12,r13,0,24,31 /* Set TID */ +#ifdef CONFIG_PPC_KUAP + cmpwi r13,0 + beq 2f /* KUAP Fault */ +#endif 4: mtspr SPRN_MMUCR,r12 @@ -532,10 +540,7 @@ finish_tlb_load_44x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ -#ifdef CONFIG_PPC_KUEP -0: rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ - patch_site 0b, patch__tlb_44x_kuep -#endif + rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */ 1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ /* Done...restore registers and get out of here. @@ -575,6 +580,10 @@ finish_tlb_load_44x: 3: mfspr r11,SPRN_SPRG3 lwz r11,PGDIR(r11) mfspr r12,SPRN_PID /* Get PID */ +#ifdef CONFIG_PPC_KUAP + cmpwi r12,0 + beq 2f /* KUAP Fault */ +#endif 4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ /* Mask of required permission bits. Note that while we @@ -672,6 +681,10 @@ finish_tlb_load_44x: 3: mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) mfspr r12,SPRN_PID /* Get PID */ +#ifdef CONFIG_PPC_KUAP + cmpwi r12,0 + beq 2f /* KUAP Fault */ +#endif 4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */ /* Make up the required permissions */ @@ -747,10 +760,7 @@ finish_tlb_load_47x: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ -#ifdef CONFIG_PPC_KUEP -0: rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ - patch_site 0b, patch__tlb_47x_kuep -#endif + rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */ 1: tlbwe r11,r13,2 /* Done...restore registers and get out of here. diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index f17ae2083733..5c5181e8d5f1 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -126,7 +126,7 @@ __secondary_hold_acknowledge: . = 0x5c .globl __run_at_load __run_at_load: -DEFINE_FIXED_SYMBOL(__run_at_load) +DEFINE_FIXED_SYMBOL(__run_at_load, first_256B) .long RUN_AT_LOAD_DEFAULT #endif @@ -156,7 +156,7 @@ __secondary_hold: /* Tell the master cpu we're here */ /* Relocation is off & we are located at an address less */ /* than 0x100, so only need to grab low order offset. */ - std r24,(ABS_ADDR(__secondary_hold_acknowledge))(0) + std r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0) sync li r26,0 @@ -164,7 +164,7 @@ __secondary_hold: tovirt(r26,r26) #endif /* All secondary cpus wait here until told to start. */ -100: ld r12,(ABS_ADDR(__secondary_hold_spinloop))(r26) +100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(r26) cmpdi 0,r12,0 beq 100b @@ -649,15 +649,15 @@ __after_prom_start: 3: #endif /* # bytes of memory to copy */ - lis r5,(ABS_ADDR(copy_to_here))@ha - addi r5,r5,(ABS_ADDR(copy_to_here))@l + lis r5,(ABS_ADDR(copy_to_here, text))@ha + addi r5,r5,(ABS_ADDR(copy_to_here, text))@l bl copy_and_flush /* copy the first n bytes */ /* this includes the code being */ /* executed here. */ /* Jump to the copy of this code that we just made */ - addis r8,r3,(ABS_ADDR(4f))@ha - addi r12,r8,(ABS_ADDR(4f))@l + addis r8,r3,(ABS_ADDR(4f, text))@ha + addi r12,r8,(ABS_ADDR(4f, text))@l mtctr r12 bctr @@ -669,8 +669,8 @@ p_end: .8byte _end - copy_to_here * Now copy the rest of the kernel up to _end, add * _end - copy_to_here to the copy limit and run again. */ - addis r8,r26,(ABS_ADDR(p_end))@ha - ld r8,(ABS_ADDR(p_end))@l(r8) + addis r8,r26,(ABS_ADDR(p_end, text))@ha + ld r8,(ABS_ADDR(p_end, text))@l(r8) add r5,r5,r8 5: bl copy_and_flush /* copy the rest */ @@ -904,7 +904,7 @@ _GLOBAL(relative_toc) blr .balign 8 -p_toc: .8byte __toc_start + 0x8000 - 0b +p_toc: .8byte .TOC. - 0b /* * This is where the main kernel code starts. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 2d596881b70e..0d073b9fd52c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -733,6 +733,7 @@ _GLOBAL(mmu_pin_tlb) #ifdef CONFIG_PIN_TLB_DATA LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) + li r8, 0 #ifdef CONFIG_PIN_TLB_IMMR li r0, 3 #else @@ -741,26 +742,26 @@ _GLOBAL(mmu_pin_tlb) mtctr r0 cmpwi r4, 0 beq 4f - LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) LOAD_REG_ADDR(r9, _sinittext) 2: ori r0, r6, MD_EVALID + ori r12, r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT mtspr SPRN_MD_CTR, r5 mtspr SPRN_MD_EPN, r0 mtspr SPRN_MD_TWC, r7 - mtspr SPRN_MD_RPN, r8 + mtspr SPRN_MD_RPN, r12 addi r5, r5, 0x100 addis r6, r6, SZ_8M@h addis r8, r8, SZ_8M@h cmplw r6, r9 bdnzt lt, 2b - -4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) +4: 2: ori r0, r6, MD_EVALID + ori r12, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT mtspr SPRN_MD_CTR, r5 mtspr SPRN_MD_EPN, r0 mtspr SPRN_MD_TWC, r7 - mtspr SPRN_MD_RPN, r8 + mtspr SPRN_MD_RPN, r12 addi r5, r5, 0x100 addis r6, r6, SZ_8M@h addis r8, r8, SZ_8M@h @@ -781,7 +782,7 @@ _GLOBAL(mmu_pin_tlb) #endif #if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA) lis r0, (MD_RSV4I | MD_TWAM)@h - mtspr SPRN_MI_CTR, r0 + mtspr SPRN_MD_CTR, r0 #endif mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 68e5c0a7e99d..b876ef8c70a7 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -421,14 +421,14 @@ InstructionTLBMiss: */ /* Get PTE (linux-style) and check access */ mfspr r3,SPRN_IMISS -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 -#ifdef CONFIG_MODULES +#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC @@ -931,7 +931,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS) _GLOBAL(load_segment_registers) li r0, NUM_USER_SEGMENTS /* load up user segment register values */ mtctr r0 /* for context 0 */ +#ifdef CONFIG_PPC_KUEP + lis r3, SR_NX@h /* Kp = 0, Ks = 0, VSID = 0 */ +#else li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */ +#endif li r4, 0 3: mtsrin r3, r4 addi r3, r3, 0x111 /* increment VSID */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index ef8d1b1c234e..bb6d5d0fc4ac 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -87,8 +87,7 @@ END_BTB_FLUSH_SECTION stw r10, 8(r1) li r10, \trapno stw r10,_TRAP(r1) - SAVE_4GPRS(3, r1) - SAVE_2GPRS(7, r1) + SAVE_GPRS(3, 8, r1) SAVE_NVGPRS(r1) stw r2,GPR2(r1) stw r12,_NIP(r1) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 0a9a0f301474..ac2b4dcf5fd3 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -462,6 +462,12 @@ END_BTB_FLUSH_SECTION mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP + mfspr r12, SPRN_MAS1 + rlwinm. r12,r12,0,0x3fff0000 + beq 2f /* KUAP fault */ +#endif + 4: /* Mask of required permission bits. Note that while we * do copy ESR:ST to _PAGE_RW position as trying to write @@ -571,6 +577,12 @@ END_BTB_FLUSH_SECTION mfspr r11,SPRN_SPRG_THREAD lwz r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP + mfspr r12, SPRN_MAS1 + rlwinm. r12,r12,0,0x3fff0000 + beq 2f /* KUAP fault */ +#endif + /* Make up the required permissions for user code */ #ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT | _PAGE_BAP_UX @@ -777,6 +789,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ slwi r10, r12, 1 or r10, r10, r12 + rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */ iseleq r12, r12, r10 rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ mtspr SPRN_MAS3, r13 diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 91a3be14808b..2669f80b3a49 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -523,7 +523,7 @@ static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, struct arch_hw_breakpoint **info, int *hit, - struct ppc_inst instr) + ppc_inst_t instr) { int i; int stepped; @@ -616,7 +616,7 @@ int hw_breakpoint_handler(struct die_args *args) int hit[HBP_NUM_MAX] = {0}; int nr_hit = 0; bool ptrace_bp = false; - struct ppc_inst instr = ppc_inst(0); + ppc_inst_t instr = ppc_inst(0); int type = 0; int size = 0; unsigned long ea; diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c index 42b967e3d85c..a74623025f3a 100644 --- a/arch/powerpc/kernel/hw_breakpoint_constraints.c +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -80,7 +80,7 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type, * Return true if the event is valid wrt dawr configuration, * including extraneous exception. Otherwise return false. */ -bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, +bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr, unsigned long ea, int type, int size, struct arch_hw_breakpoint *info) { @@ -127,7 +127,7 @@ bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, return false; } -void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, +void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr, int *type, int *size, unsigned long *ea) { struct instruction_op op; diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 1f835539fda4..4ad79eb638c6 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -82,7 +82,7 @@ void power4_idle(void) return; if (cpu_has_feature(CPU_FTR_ALTIVEC)) - asm volatile("DSSALL ; sync" ::: "memory"); + asm volatile(PPC_DSSALL " ; sync" ::: "memory"); power4_idle_nap(); diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 13cad9297d82..3c097356366b 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -129,7 +129,7 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) mtspr SPRN_HID0,r4 BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */ diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 835b626cd476..7cd6ce3ec423 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -81,7 +81,7 @@ notrace long system_call_exception(long r3, long r4, long r5, { syscall_fn f; - kuep_lock(); + kuap_lock(); regs->orig_gpr3 = r3; @@ -148,7 +148,7 @@ notrace long system_call_exception(long r3, long r4, long r5, */ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) - current_thread_info()->flags |= _TIF_RESTOREALL; + set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags); /* * If the system call was made with a transaction active, doom it and @@ -181,7 +181,7 @@ notrace long system_call_exception(long r3, long r4, long r5, local_irq_enable(); - if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { + if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) { if (unlikely(trap_is_unsupported_scv(regs))) { /* Unsupported scv vector */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); @@ -343,7 +343,7 @@ interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs) unsigned long ti_flags; again: - ti_flags = READ_ONCE(current_thread_info()->flags); + ti_flags = read_thread_flags(); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); if (ti_flags & _TIF_NEED_RESCHED) { @@ -359,7 +359,7 @@ again: do_notify_resume(regs, ti_flags); } local_irq_disable(); - ti_flags = READ_ONCE(current_thread_info()->flags); + ti_flags = read_thread_flags(); } if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { @@ -406,7 +406,6 @@ again: /* Restore user access locks last */ kuap_user_restore(regs); - kuep_unlock(); return ret; } @@ -437,7 +436,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, /* Check whether the syscall is issued inside a restartable sequence */ rseq_syscall(regs); - ti_flags = current_thread_info()->flags; + ti_flags = read_thread_flags(); if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) { if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { @@ -532,8 +531,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) unsigned long flags; unsigned long ret = 0; unsigned long kuap; - bool stack_store = current_thread_info()->flags & - _TIF_EMULATE_STACK_STORE; + bool stack_store = read_thread_flags() & _TIF_EMULATE_STACK_STORE; if (regs_is_unrecoverable(regs)) unrecoverable_exception(regs); @@ -554,7 +552,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) again: if (IS_ENABLED(CONFIG_PREEMPT)) { /* Return to preemptible kernel context */ - if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) { + if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) { if (preempt_count() == 0) preempt_schedule_irq(); } diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index ec950b08a8dc..7bab2d7de372 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -30,21 +30,25 @@ COMPAT_SYS_CALL_TABLE: .ifc \srr,srr mfspr r11,SPRN_SRR0 ld r12,_NIP(r1) + clrrdi r11,r11,2 + clrrdi r12,r12,2 100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) mfspr r11,SPRN_SRR1 ld r12,_MSR(r1) 100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) .else mfspr r11,SPRN_HSRR0 ld r12,_NIP(r1) + clrrdi r11,r11,2 + clrrdi r12,r12,2 100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) mfspr r11,SPRN_HSRR1 ld r12,_MSR(r1) 100: tdne r11,r12 - EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) + EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) .endif #endif .endm @@ -162,10 +166,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * The value of AMR only matters while we're in the kernel. */ mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) + REST_GPRS(2, 3, r1) + REST_GPR(13, r1) + REST_GPR(1, r1) RFSCV_TO_USER b . /* prevent speculative execution */ @@ -183,9 +186,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mtctr r3 mtlr r4 mtspr SPRN_XER,r5 - REST_10GPRS(2, r1) - REST_2GPRS(12, r1) - ld r1,GPR1(r1) + REST_GPRS(2, 13, r1) + REST_GPR(1, r1) RFI_TO_USER .Lsyscall_vectored_\name\()_rst_end: @@ -374,10 +376,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * The value of AMR only matters while we're in the kernel. */ mtcr r2 - ld r2,GPR2(r1) - ld r3,GPR3(r1) - ld r13,GPR13(r1) - ld r1,GPR1(r1) + REST_GPRS(2, 3, r1) + REST_GPR(13, r1) + REST_GPR(1, r1) RFI_TO_USER b . /* prevent speculative execution */ @@ -388,8 +389,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mtctr r3 mtspr SPRN_XER,r4 ld r0,GPR0(r1) - REST_8GPRS(4, r1) - ld r12,GPR12(r1) + REST_GPRS(4, 12, r1) b .Lsyscall_restore_regs_cont .Lsyscall_rst_end: @@ -518,17 +518,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) ld r6,_XER(r1) li r0,0 - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) - REST_GPR(13, r1) + REST_GPRS(7, 13, r1) mtcr r3 mtlr r4 mtctr r5 mtspr SPRN_XER,r6 - REST_4GPRS(2, r1) - REST_GPR(6, r1) + REST_GPRS(2, 6, r1) REST_GPR(0, r1) REST_GPR(1, r1) .ifc \srr,srr @@ -625,8 +622,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) ld r6,_CCR(r1) li r0,0 - REST_4GPRS(7, r1) - REST_2GPRS(11, r1) + REST_GPRS(7, 12, r1) mtlr r3 mtctr r4 @@ -638,7 +634,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) */ std r0,STACK_FRAME_OVERHEAD-16(r1) - REST_4GPRS(2, r1) + REST_GPRS(2, 5, r1) bne- cr1,1f /* emulate stack store */ mtcr r6 @@ -703,7 +699,7 @@ interrupt_return_macro hsrr .globl __end_soft_masked __end_soft_masked: -DEFINE_FIXED_SYMBOL(__end_soft_masked) +DEFINE_FIXED_SYMBOL(__end_soft_masked, text) #endif /* CONFIG_PPC_BOOK3S */ #ifdef CONFIG_PPC_BOOK3S diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c4f1d6b7d992..2cf31a97126c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -745,7 +745,8 @@ void __do_irq(struct pt_regs *regs) irq = ppc_md.get_irq(); /* We can hard enable interrupts now to allow perf interrupts */ - may_hard_irq_enable(); + if (should_hard_irq_enable()) + do_hard_irq_enable(); /* And finally process it */ if (unlikely(!irq)) @@ -811,7 +812,7 @@ void __init init_IRQ(void) ppc_md.init_IRQ(); } -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x void *critirq_ctx[NR_CPUS] __read_mostly; void *dbgirq_ctx[NR_CPUS] __read_mostly; void *mcheckirq_ctx[NR_CPUS] __read_mostly; diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index bdee7262c080..9f8d0fa7b718 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -48,7 +48,7 @@ static struct hard_trap_info { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_BOOKE_OR_40x { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ #if defined(CONFIG_FSL_BOOKE) { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ @@ -67,7 +67,7 @@ static struct hard_trap_info { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ #endif -#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */ +#else /* !CONFIG_BOOKE_OR_40x */ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ #if defined(CONFIG_PPC_8xx) { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 86d77ff056a6..9a492fdec1df 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -124,7 +124,7 @@ int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; struct kprobe *prev; - struct ppc_inst insn = ppc_inst_read(p->addr); + ppc_inst_t insn = ppc_inst_read(p->addr); if ((unsigned long)p->addr & 0x03) { printk("Attempt to register kprobe at an unaligned address\n"); @@ -244,7 +244,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe); static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; - struct ppc_inst insn = ppc_inst_read(p->ainsn.insn); + ppc_inst_t insn = ppc_inst_read(p->ainsn.insn); /* regs->nip is also adjusted if emulate_step returns 1 */ ret = emulate_step(regs, insn); diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 225511d73bef..f2e03ed423d0 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR) /* Stop DST streams */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL sync END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -292,7 +292,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR) isync /* Stop DST streams */ - DSSALL + PPC_DSSALL sync /* Get the current enable bit of the L3CR into r4 */ @@ -401,7 +401,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR) _GLOBAL(__flush_disable_L1) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index fd829f7f25a4..2503dd4713b9 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -586,7 +586,7 @@ void machine_check_print_event_info(struct machine_check_event *evt, mc_error_class[evt->error_class] : "Unknown"; printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype); -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /* Display faulty slb contents for SLB errors. */ if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest) slb_dump_contents(local_paca->mce_faulty_slbs); diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index c2f55fe7092d..71e8f2a92e36 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -77,15 +77,15 @@ static bool mce_in_guest(void) } /* flush SLBs and reload */ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU void flush_and_reload_slb(void) { - /* Invalidate all SLBs */ - slb_flush_all_realmode(); - if (early_radix_enabled()) return; + /* Invalidate all SLBs */ + slb_flush_all_realmode(); + /* * This probably shouldn't happen, but it may be possible it's * called in early boot before SLB shadows are allocated. @@ -99,7 +99,7 @@ void flush_and_reload_slb(void) void flush_erat(void) { -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) { flush_and_reload_slb(); return; @@ -114,7 +114,7 @@ void flush_erat(void) static int mce_flush(int what) { -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (what == MCE_FLUSH_SLB) { flush_and_reload_slb(); return 1; @@ -455,7 +455,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr, * in real-mode is tricky and can lead to recursive * faults */ - struct ppc_inst instr; + ppc_inst_t instr; unsigned long pfn, instr_addr; struct instruction_op op; struct pt_regs tmp = *regs; @@ -499,8 +499,10 @@ static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1, /* attempt to correct the error */ switch (table[i].error_type) { case MCE_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU if (local_paca->in_mce == 1) slb_save_contents(local_paca->mce_faulty_slbs); +#endif handled = mce_flush(MCE_FLUSH_SLB); break; case MCE_ERROR_TYPE_ERAT: @@ -588,8 +590,10 @@ static int mce_handle_derror(struct pt_regs *regs, /* attempt to correct the error */ switch (table[i].error_type) { case MCE_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU if (local_paca->in_mce == 1) slb_save_contents(local_paca->mce_faulty_slbs); +#endif if (mce_flush(MCE_FLUSH_SLB)) handled = 1; break; diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index ed04a3ba66fe..40a583e9d3c7 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -90,16 +90,17 @@ int module_finalize(const Elf_Ehdr *hdr, } static __always_inline void * -__module_alloc(unsigned long size, unsigned long start, unsigned long end) +__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn) { pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; + gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0); /* * Don't do huge page allocations for modules yet until more testing * is done. STRICT_MODULE_RWX may require extra work to support this * too. */ - return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot, + return __vmalloc_node_range(size, 1, start, end, gfp, prot, VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP, NUMA_NO_NODE, __builtin_return_address(0)); } @@ -114,13 +115,13 @@ void *module_alloc(unsigned long size) /* First try within 32M limit from _etext to avoid branch trampolines */ if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) - ptr = __module_alloc(size, limit, MODULES_END); + ptr = __module_alloc(size, limit, MODULES_END, true); if (!ptr) - ptr = __module_alloc(size, MODULES_VADDR, MODULES_END); + ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false); return ptr; #else - return __module_alloc(size, VMALLOC_START, VMALLOC_END); + return __module_alloc(size, VMALLOC_START, VMALLOC_END, false); #endif } diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index f417afc08d33..a491ad481d85 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -273,6 +273,31 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, } #ifdef CONFIG_DYNAMIC_FTRACE +int module_trampoline_target(struct module *mod, unsigned long addr, + unsigned long *target) +{ + unsigned int jmp[4]; + + /* Find where the trampoline jumps to */ + if (copy_from_kernel_nofault(jmp, (void *)addr, sizeof(jmp))) + return -EFAULT; + + /* verify that this is what we expect it to be */ + if ((jmp[0] & 0xffff0000) != PPC_RAW_LIS(_R12, 0) || + (jmp[1] & 0xffff0000) != PPC_RAW_ADDI(_R12, _R12, 0) || + jmp[2] != PPC_RAW_MTCTR(_R12) || + jmp[3] != PPC_RAW_BCTR()) + return -EINVAL; + + addr = (jmp[1] & 0xffff) | ((jmp[0] & 0xffff) << 16); + if (addr & 0x8000) + addr -= 0x10000; + + *target = addr; + + return 0; +} + int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs) { module->arch.tramp = do_plt_call(module->core_layout.base, @@ -281,6 +306,14 @@ int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs) if (!module->arch.tramp) return -ENOENT; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + module->arch.tramp_regs = do_plt_call(module->core_layout.base, + (unsigned long)ftrace_regs_caller, + sechdrs, module); + if (!module->arch.tramp_regs) + return -ENOENT; +#endif + return 0; } #endif diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 6baa676e7cb6..5d77d3f5fbb5 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -422,11 +422,17 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, const char *name) { long reladdr; + func_desc_t desc; + int i; if (is_mprofile_ftrace_call(name)) return create_ftrace_stub(entry, addr, me); - memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns)); + for (i = 0; i < sizeof(ppc64_stub_insns) / sizeof(u32); i++) { + if (patch_instruction(&entry->jump[i], + ppc_inst(ppc64_stub_insns[i]))) + return 0; + } /* Stub uses address relative to r2. */ reladdr = (unsigned long)entry - my_r2(sechdrs, me); @@ -437,10 +443,24 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, } pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); - entry->jump[0] |= PPC_HA(reladdr); - entry->jump[1] |= PPC_LO(reladdr); - entry->funcdata = func_desc(addr); - entry->magic = STUB_MAGIC; + if (patch_instruction(&entry->jump[0], + ppc_inst(entry->jump[0] | PPC_HA(reladdr)))) + return 0; + + if (patch_instruction(&entry->jump[1], + ppc_inst(entry->jump[1] | PPC_LO(reladdr)))) + return 0; + + // func_desc_t is 8 bytes if ABIv2, else 16 bytes + desc = func_desc(addr); + for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) { + if (patch_instruction(((u32 *)&entry->funcdata) + i, + ppc_inst(((u32 *)(&desc))[i]))) + return 0; + } + + if (patch_instruction(&entry->magic, ppc_inst(STUB_MAGIC))) + return 0; return 1; } @@ -495,8 +515,11 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) me->name, *instruction, instruction); return 0; } + /* ld r2,R2_STACK_OFFSET(r1) */ - *instruction = PPC_INST_LD_TOC; + if (patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC))) + return 0; + return 1; } @@ -636,9 +659,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, } /* Only replace bits 2 through 26 */ - *(uint32_t *)location - = (*(uint32_t *)location & ~0x03fffffc) + value = (*(uint32_t *)location & ~0x03fffffc) | (value & 0x03fffffc); + + if (patch_instruction((u32 *)location, ppc_inst(value))) + return -EFAULT; + break; case R_PPC64_REL64: diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 3c8d9bbb51cf..0d9f9cd41e13 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -540,7 +540,7 @@ static struct pstore_info nvram_pstore_info = { .write = nvram_pstore_write, }; -static int nvram_pstore_init(void) +static int __init nvram_pstore_init(void) { int rc = 0; @@ -562,7 +562,7 @@ static int nvram_pstore_init(void) return rc; } #else -static int nvram_pstore_init(void) +static int __init nvram_pstore_init(void) { return -1; } @@ -755,7 +755,7 @@ static unsigned char __init nvram_checksum(struct nvram_header *p) * Per the criteria passed via nvram_remove_partition(), should this * partition be removed? 1=remove, 0=keep */ -static int nvram_can_remove_partition(struct nvram_partition *part, +static int __init nvram_can_remove_partition(struct nvram_partition *part, const char *name, int sig, const char *exceptions[]) { if (part->header.signature != sig) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index ce1903064031..3b1c2236cbee 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -153,7 +153,7 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) { - struct ppc_inst branch_op_callback, branch_emulate_step, temp; + ppc_inst_t branch_op_callback, branch_emulate_step, temp; unsigned long op_callback_addr, emulate_step_addr; kprobe_opcode_t *buff; long b_offset; @@ -228,12 +228,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p) /* * 3. load instruction to be emulated into relevant register, and */ - if (IS_ENABLED(CONFIG_PPC64)) { - temp = ppc_inst_read(p->ainsn.insn); - patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); - } else { - patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX); - } + temp = ppc_inst_read(p->ainsn.insn); + patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); /* * 4. branch back from trampoline @@ -269,7 +265,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op) void arch_optimize_kprobes(struct list_head *oplist) { - struct ppc_inst instr; + ppc_inst_t instr; struct optimized_kprobe *op; struct optimized_kprobe *tmp; diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index 19ea3312403c..5c7f0b4b784b 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -10,8 +10,8 @@ #include <asm/asm-offsets.h> #ifdef CONFIG_PPC64 -#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base) -#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base) +#define SAVE_30GPRS(base) SAVE_GPRS(2, 31, base) +#define REST_30GPRS(base) REST_GPRS(2, 31, base) #define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop #else #define SAVE_30GPRS(base) stmw r2, GPR2(base) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 4208b4044d12..39da688a9455 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -139,8 +139,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit) } #endif /* CONFIG_PPC_PSERIES */ -#ifdef CONFIG_PPC_BOOK3S_64 - +#ifdef CONFIG_PPC_64S_HASH_MMU /* * 3 persistent SLBs are allocated here. The buffer will be zero * initially, hence will all be invaild until we actually write them. @@ -169,8 +168,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit) return s; } - -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_64S_HASH_MMU */ #ifdef CONFIG_PPC_PSERIES /** @@ -226,7 +224,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->kexec_state = KEXEC_STATE_NONE; new_paca->__current = &init_task; new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU new_paca->slb_shadow_ptr = NULL; #endif @@ -307,7 +305,7 @@ void __init allocate_paca(int cpu) #ifdef CONFIG_PPC_PSERIES paca->lppaca_ptr = new_lppaca(cpu, limit); #endif -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU paca->slb_shadow_ptr = new_slb_shadow(cpu, limit); #endif #ifdef CONFIG_PPC_PSERIES @@ -328,7 +326,7 @@ void __init free_unused_pacas(void) paca_nr_cpu_ids = nr_cpu_ids; paca_ptrs_size = new_ptrs_size; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU if (early_radix_enabled()) { /* Ugly fixup, see new_slb_shadow() */ memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr), @@ -341,9 +339,9 @@ void __init free_unused_pacas(void) paca_ptrs_size + paca_struct_size, nr_cpu_ids); } +#ifdef CONFIG_PPC_64S_HASH_MMU void copy_mm_to_paca(struct mm_struct *mm) { -#ifdef CONFIG_PPC_BOOK3S mm_context_t *context = &mm->context; #ifdef CONFIG_PPC_MM_SLICES @@ -356,7 +354,5 @@ void copy_mm_to_paca(struct mm_struct *mm) get_paca()->mm_ctx_user_psize = context->user_psize; get_paca()->mm_ctx_sllp = context->sllp; #endif -#else /* !CONFIG_PPC_BOOK3S */ - return; -#endif } +#endif /* CONFIG_PPC_64S_HASH_MMU */ diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 6749905932f4..8bc9cf62cd93 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -62,7 +62,7 @@ EXPORT_SYMBOL(isa_mem_base); static const struct dma_map_ops *pci_dma_ops; -void set_pci_dma_ops(const struct dma_map_ops *dma_ops) +void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops) { pci_dma_ops = dma_ops; } diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index b49e1060a3bf..48537964fba1 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -37,7 +37,7 @@ int pcibios_assign_bus_offset = 1; EXPORT_SYMBOL(isa_io_base); EXPORT_SYMBOL(pci_dram_offset); -void pcibios_make_OF_bus_map(void); +void __init pcibios_make_OF_bus_map(void); static void fixup_cpc710_pci64(struct pci_dev* dev); static u8* pci_to_OF_bus_map; @@ -109,7 +109,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus) } } -void +void __init pcibios_make_OF_bus_map(void) { int i; diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index 877817471e3c..6a029f2378e1 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -25,7 +25,7 @@ static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes loff_t *ppos) { return simple_read_from_buffer(buf, nbytes, ppos, - PDE_DATA(file_inode(file)), PAGE_SIZE); + pde_data(file_inode(file)), PAGE_SIZE); } static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) @@ -34,7 +34,7 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) return -EINVAL; remap_pfn_range(vma, vma->vm_start, - __pa(PDE_DATA(file_inode(file))) >> PAGE_SHIFT, + __pa(pde_data(file_inode(file))) >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot); return 0; } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 406d7ee9e322..984813a4d5dc 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -628,7 +628,7 @@ static void do_break_handler(struct pt_regs *regs) { struct arch_hw_breakpoint null_brk = {0}; struct arch_hw_breakpoint *info; - struct ppc_inst instr = ppc_inst(0); + ppc_inst_t instr = ppc_inst(0); int type = 0; int size = 0; unsigned long ea; @@ -1156,6 +1156,40 @@ static inline void save_sprs(struct thread_struct *t) #endif } +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void kvmppc_save_user_regs(void) +{ + unsigned long usermsr; + + if (!current->thread.regs) + return; + + usermsr = current->thread.regs->msr; + + if (usermsr & MSR_FP) + save_fpu(current); + + if (usermsr & MSR_VEC) + save_altivec(current); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (usermsr & MSR_TM) { + current->thread.tm_tfhar = mfspr(SPRN_TFHAR); + current->thread.tm_tfiar = mfspr(SPRN_TFIAR); + current->thread.tm_texasr = mfspr(SPRN_TEXASR); + current->thread.regs->msr &= ~MSR_TM; + } +#endif +} +EXPORT_SYMBOL_GPL(kvmppc_save_user_regs); + +void kvmppc_save_current_sprs(void) +{ + save_sprs(¤t->thread); +} +EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs); +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ + static inline void restore_sprs(struct thread_struct *old_thread, struct thread_struct *new_thread) { @@ -1206,7 +1240,7 @@ struct task_struct *__switch_to(struct task_struct *prev, { struct thread_struct *new_thread, *old_thread; struct task_struct *last; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU struct ppc64_tlb_batch *batch; #endif @@ -1215,7 +1249,7 @@ struct task_struct *__switch_to(struct task_struct *prev, WARN_ON(!irqs_disabled()); -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU batch = this_cpu_ptr(&ppc64_tlb_batch); if (batch->active) { current_thread_info()->local_flags |= _TLF_LAZY_MMU; @@ -1281,9 +1315,9 @@ struct task_struct *__switch_to(struct task_struct *prev, set_return_regs_changed(); /* _switch changes stack (and regs) */ -#ifdef CONFIG_PPC32 - kuap_assert_locked(); -#endif + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + kuap_assert_locked(); + last = _switch(old_thread, new_thread); /* @@ -1294,6 +1328,7 @@ struct task_struct *__switch_to(struct task_struct *prev, */ #ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU /* * This applies to a process that was context switched while inside * arch_enter_lazy_mmu_mode(), to re-activate the batch that was @@ -1305,6 +1340,7 @@ struct task_struct *__switch_to(struct task_struct *prev, batch = this_cpu_ptr(&ppc64_tlb_batch); batch->active = 1; } +#endif /* * Math facilities are masked out of the child MSR in copy_thread. @@ -1655,7 +1691,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) { -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU unsigned long sp_vsid; unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; @@ -1767,6 +1803,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) p->thread.kuap = KUAP_NONE; #endif +#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) + p->thread.pid = MMU_NO_CONTEXT; +#endif setup_ksp_vsid(p, sp); @@ -2299,10 +2338,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) * the heap, we can put it above 1TB so it is backed by a 1TB * segment. Otherwise the heap will be in the bottom 1TB * which always uses 256MB segments and this may result in a - * performance penalty. We don't need to worry about radix. For - * radix, mmu_highuser_ssize remains unchanged from 256MB. + * performance penalty. */ - if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) + if (!radix_enabled() && !is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T); #endif diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 2e67588f6f6e..3d30d40a0e9c 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -231,7 +231,7 @@ static void __init check_cpu_pa_features(unsigned long node) ibm_pa_features, ARRAY_SIZE(ibm_pa_features)); } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU static void __init init_mmu_slb_size(unsigned long node) { const __be32 *slb_size_ptr; @@ -402,7 +402,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */ /* Use common scan routine to determine if this is the chosen node */ - if (early_init_dt_scan_chosen(node, uname, depth, data) == 0) + if (early_init_dt_scan_chosen(data) < 0) return 0; #ifdef CONFIG_PPC64 @@ -447,7 +447,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node, */ #ifdef CONFIG_SPARSEMEM -static bool validate_mem_limit(u64 base, u64 *size) +static bool __init validate_mem_limit(u64 base, u64 *size) { u64 max_mem = 1UL << (MAX_PHYSMEM_BITS); @@ -458,7 +458,7 @@ static bool validate_mem_limit(u64 base, u64 *size) return true; } #else -static bool validate_mem_limit(u64 base, u64 *size) +static bool __init validate_mem_limit(u64 base, u64 *size) { return true; } @@ -532,19 +532,18 @@ static int __init early_init_drmem_lmb(struct drmem_lmb *lmb, } #endif /* CONFIG_PPC_PSERIES */ -static int __init early_init_dt_scan_memory_ppc(unsigned long node, - const char *uname, - int depth, void *data) +static int __init early_init_dt_scan_memory_ppc(void) { #ifdef CONFIG_PPC_PSERIES - if (depth == 1 && - strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) { + const void *fdt = initial_boot_params; + int node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory"); + + if (node > 0) walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb); - return 0; - } + #endif - - return early_init_dt_scan_memory(node, uname, depth, data); + + return early_init_dt_scan_memory(); } /* @@ -748,8 +747,8 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); /* Scan memory nodes and rebuild MEMBLOCKs */ - of_scan_flat_dt(early_init_dt_scan_root, NULL); - of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); + early_init_dt_scan_root(); + early_init_dt_scan_memory_ppc(); parse_early_param(); @@ -857,8 +856,8 @@ void __init early_get_first_memblock_info(void *params, phys_addr_t *size) * mess the memblock. */ add_mem_to_memblock = 0; - of_scan_flat_dt(early_init_dt_scan_root, NULL); - of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); + early_init_dt_scan_root(); + early_init_dt_scan_memory_ppc(); add_mem_to_memblock = 1; if (size) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 18b04b08b983..0ac5faacc909 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -672,7 +672,7 @@ static inline int __init prom_getproplen(phandle node, const char *pname) return call_prom("getproplen", 2, 1, node, ADDR(pname)); } -static void add_string(char **str, const char *q) +static void __init add_string(char **str, const char *q) { char *p = *str; @@ -682,7 +682,7 @@ static void add_string(char **str, const char *q) *str = p; } -static char *tohex(unsigned int x) +static char *__init tohex(unsigned int x) { static const char digits[] __initconst = "0123456789abcdef"; static char result[9] __prombss; @@ -728,7 +728,7 @@ static int __init prom_setprop(phandle node, const char *nodename, #define prom_islower(c) ('a' <= (c) && (c) <= 'z') #define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c)) -static unsigned long prom_strtoul(const char *cp, const char **endp) +static unsigned long __init prom_strtoul(const char *cp, const char **endp) { unsigned long result = 0, base = 10, value; @@ -753,7 +753,7 @@ static unsigned long prom_strtoul(const char *cp, const char **endp) return result; } -static unsigned long prom_memparse(const char *ptr, const char **retptr) +static unsigned long __init prom_memparse(const char *ptr, const char **retptr) { unsigned long ret = prom_strtoul(ptr, retptr); int shift = 0; @@ -1786,7 +1786,7 @@ static void __init prom_close_stdin(void) } #ifdef CONFIG_PPC_SVM -static int prom_rtas_hcall(uint64_t args) +static int __init prom_rtas_hcall(uint64_t args) { register uint64_t arg1 asm("r3") = H_RTAS; register uint64_t arg2 asm("r4") = args; @@ -2991,7 +2991,7 @@ static void __init fixup_device_tree_efika_add_phy(void) /* Check if the phy-handle property exists - bail if it does */ rv = prom_getprop(node, "phy-handle", prop, sizeof(prop)); - if (!rv) + if (rv <= 0) return; /* @@ -3248,7 +3248,7 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) /* * Perform the Enter Secure Mode ultracall. */ -static int enter_secure_mode(unsigned long kbase, unsigned long fdt) +static int __init enter_secure_mode(unsigned long kbase, unsigned long fdt) { register unsigned long r3 asm("r3") = UV_ESM; register unsigned long r4 asm("r4") = kbase; diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 7c7093c17c45..c43f77e2ac31 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -260,8 +260,7 @@ long do_syscall_trace_enter(struct pt_regs *regs) { u32 flags; - flags = READ_ONCE(current_thread_info()->flags) & - (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); + flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); if (flags) { int rc = tracehook_report_syscall_entry(regs); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index ff80bbad22a5..733e6ef36758 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -492,8 +492,25 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) } EXPORT_SYMBOL(rtas_call); -/* For RTAS_BUSY (-2), delay for 1 millisecond. For an extended busy status - * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds. +/** + * rtas_busy_delay_time() - From an RTAS status value, calculate the + * suggested delay time in milliseconds. + * + * @status: a value returned from rtas_call() or similar APIs which return + * the status of a RTAS function call. + * + * Context: Any context. + * + * Return: + * * 100000 - If @status is 9905. + * * 10000 - If @status is 9904. + * * 1000 - If @status is 9903. + * * 100 - If @status is 9902. + * * 10 - If @status is 9901. + * * 1 - If @status is either 9900 or -2. This is "wrong" for -2, but + * some callers depend on this behavior, and the worst outcome + * is that they will delay for longer than necessary. + * * 0 - If @status is not a busy or extended delay value. */ unsigned int rtas_busy_delay_time(int status) { @@ -513,17 +530,77 @@ unsigned int rtas_busy_delay_time(int status) } EXPORT_SYMBOL(rtas_busy_delay_time); -/* For an RTAS busy status code, perform the hinted delay. */ -unsigned int rtas_busy_delay(int status) +/** + * rtas_busy_delay() - helper for RTAS busy and extended delay statuses + * + * @status: a value returned from rtas_call() or similar APIs which return + * the status of a RTAS function call. + * + * Context: Process context. May sleep or schedule. + * + * Return: + * * true - @status is RTAS_BUSY or an extended delay hint. The + * caller may assume that the CPU has been yielded if necessary, + * and that an appropriate delay for @status has elapsed. + * Generally the caller should reattempt the RTAS call which + * yielded @status. + * + * * false - @status is not @RTAS_BUSY nor an extended delay hint. The + * caller is responsible for handling @status. + */ +bool rtas_busy_delay(int status) { unsigned int ms; + bool ret; - might_sleep(); - ms = rtas_busy_delay_time(status); - if (ms && need_resched()) - msleep(ms); + switch (status) { + case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: + ret = true; + ms = rtas_busy_delay_time(status); + /* + * The extended delay hint can be as high as 100 seconds. + * Surely any function returning such a status is either + * buggy or isn't going to be significantly slowed by us + * polling at 1HZ. Clamp the sleep time to one second. + */ + ms = clamp(ms, 1U, 1000U); + /* + * The delay hint is an order-of-magnitude suggestion, not + * a minimum. It is fine, possibly even advantageous, for + * us to pause for less time than hinted. For small values, + * use usleep_range() to ensure we don't sleep much longer + * than actually needed. + * + * See Documentation/timers/timers-howto.rst for + * explanation of the threshold used here. In effect we use + * usleep_range() for 9900 and 9901, msleep() for + * 9902-9905. + */ + if (ms <= 20) + usleep_range(ms * 100, ms * 1000); + else + msleep(ms); + break; + case RTAS_BUSY: + ret = true; + /* + * We should call again immediately if there's no other + * work to do. + */ + cond_resched(); + break; + default: + ret = false; + /* + * Not a busy or extended delay status; the caller should + * handle @status itself. Ensure we warn on misuses in + * atomic context regardless. + */ + might_sleep(); + break; + } - return ms; + return ret; } EXPORT_SYMBOL(rtas_busy_delay); @@ -809,13 +886,13 @@ void rtas_os_term(char *str) /** * rtas_activate_firmware() - Activate a new version of firmware. * + * Context: This function may sleep. + * * Activate a new version of partition firmware. The OS must call this * after resuming from a partition hibernation or migration in order * to maintain the ability to perform live firmware updates. It's not * catastrophic for this method to be absent or to fail; just log the * condition in that case. - * - * Context: This function may sleep. */ void rtas_activate_firmware(void) { @@ -890,11 +967,12 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...) #endif /* CONFIG_PPC_PSERIES */ /** - * Find a specific pseries error log in an RTAS extended event log. + * get_pseries_errorlog() - Find a specific pseries error log in an RTAS + * extended event log. * @log: RTAS error/event log * @section_id: two character section identifier * - * Returns a pointer to the specified errorlog or NULL if not found. + * Return: A pointer to the specified errorlog or NULL if not found. */ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, uint16_t section_id) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 32ee17753eb4..cf0f42909ddf 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -455,7 +455,7 @@ static void rtas_event_scan(struct work_struct *w) } #ifdef CONFIG_PPC64 -static void retrieve_nvram_error_log(void) +static void __init retrieve_nvram_error_log(void) { unsigned int err_type ; int rc ; @@ -473,12 +473,12 @@ static void retrieve_nvram_error_log(void) } } #else /* CONFIG_PPC64 */ -static void retrieve_nvram_error_log(void) +static void __init retrieve_nvram_error_log(void) { } #endif /* CONFIG_PPC64 */ -static void start_event_scan(void) +static void __init start_event_scan(void) { printk(KERN_DEBUG "RTAS daemon started\n"); pr_debug("rtasd: will sleep for %d milliseconds\n", diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 15fb5ea1b9ea..e159d4093d98 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -44,7 +44,7 @@ static void enable_barrier_nospec(bool enable) do_barrier_nospec_fixups(enable); } -void setup_barrier_nospec(void) +void __init setup_barrier_nospec(void) { bool enable; @@ -132,7 +132,7 @@ early_param("nospectre_v2", handle_nospectre_v2); #endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_PPC_FSL_BOOK3E -void setup_spectre_v2(void) +void __init setup_spectre_v2(void) { if (no_spectrev2 || cpu_mitigations_off()) do_btb_flush_fixups(); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4f1322b65760..f8da937df918 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -582,7 +582,7 @@ static __init int add_pcspkr(void) device_initcall(add_pcspkr); #endif /* CONFIG_PCSPKR_PLATFORM */ -void probe_machine(void) +static __init void probe_machine(void) { extern struct machdep_calls __machine_desc_start; extern struct machdep_calls __machine_desc_end; diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 84058bbc8fe9..93f22da12abe 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -29,7 +29,7 @@ void setup_tlb_core_data(void); static inline void setup_tlb_core_data(void) { } #endif -#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x void exc_lvl_early_init(void); #else static inline void exc_lvl_early_init(void) { } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 7ec5c47fce0e..a6e9d36d7c01 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -75,7 +75,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE); notrace void __init machine_init(u64 dt_ptr) { u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache); - struct ppc_inst insn; + ppc_inst_t insn; /* Configure static keys first, now that we're relocated. */ setup_feature_keys(); @@ -175,7 +175,7 @@ void __init emergency_stack_init(void) } #endif -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x void __init exc_lvl_early_init(void) { unsigned int i, hw_cpu; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6052f5d5ded3..be8577ac9397 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -499,7 +499,7 @@ void smp_release_cpus(void) * routines and/or provided to userland */ -static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, +static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, u32 bsize, u32 sets) { info->size = size; @@ -771,50 +771,6 @@ void __init emergency_stack_init(void) } #ifdef CONFIG_SMP -/** - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu - * @cpu: cpu to allocate for - * @size: size allocation in bytes - * @align: alignment - * - * Allocate @size bytes aligned at @align for cpu @cpu. This wrapper - * does the right thing for NUMA regardless of the current - * configuration. - * - * RETURNS: - * Pointer to the allocated area on success, NULL on failure. - */ -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, - size_t align) -{ - const unsigned long goal = __pa(MAX_DMA_ADDRESS); -#ifdef CONFIG_NUMA - int node = early_cpu_to_node(cpu); - void *ptr; - - if (!node_online(node) || !NODE_DATA(node)) { - ptr = memblock_alloc_from(size, align, goal); - pr_info("cpu %d has no node %d or node-local memory\n", - cpu, node); - pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", - cpu, size, __pa(ptr)); - } else { - ptr = memblock_alloc_try_nid(size, align, goal, - MEMBLOCK_ALLOC_ACCESSIBLE, node); - pr_debug("per cpu data for cpu%d %lu bytes on node%d at " - "%016lx\n", cpu, size, node, __pa(ptr)); - } - return ptr; -#else - return memblock_alloc_from(size, align, goal); -#endif -} - -static void __init pcpu_free_bootmem(void *ptr, size_t size) -{ - memblock_free(ptr, size); -} - static int pcpu_cpu_distance(unsigned int from, unsigned int to) { if (early_cpu_to_node(from) == early_cpu_to_node(to)) @@ -823,53 +779,13 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to) return REMOTE_DISTANCE; } -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(__per_cpu_offset); - -static void __init pcpu_populate_pte(unsigned long addr) +static __init int pcpu_cpu_to_node(int cpu) { - pgd_t *pgd = pgd_offset_k(addr); - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - - p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) { - pud_t *new; - - new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); - if (!new) - goto err_alloc; - p4d_populate(&init_mm, p4d, new); - } - - pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { - pmd_t *new; - - new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); - if (!new) - goto err_alloc; - pud_populate(&init_mm, pud, new); - } - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { - pte_t *new; - - new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); - if (!new) - goto err_alloc; - pmd_populate_kernel(&init_mm, pmd, new); - } - - return; - -err_alloc: - panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", - __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); + return early_cpu_to_node(cpu); } +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset); void __init setup_per_cpu_areas(void) { @@ -880,18 +796,27 @@ void __init setup_per_cpu_areas(void) int rc = -EINVAL; /* - * Linear mapping is one of 4K, 1M and 16M. For 4K, no need - * to group units. For larger mappings, use 1M atom which - * should be large enough to contain a number of units. + * BookE and BookS radix are historical values and should be revisited. */ - if (mmu_linear_psize == MMU_PAGE_4K) + if (IS_ENABLED(CONFIG_PPC_BOOK3E)) { + atom_size = SZ_1M; + } else if (radix_enabled()) { atom_size = PAGE_SIZE; - else - atom_size = 1 << 20; + } else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) { + /* + * Linear mapping is one of 4K, 1M and 16M. For 4K, no need + * to group units. For larger mappings, use 1M atom which + * should be large enough to contain a number of units. + */ + if (mmu_linear_psize == MMU_PAGE_4K) + atom_size = PAGE_SIZE; + else + atom_size = SZ_1M; + } if (pcpu_chosen_fc != PCPU_FC_PAGE) { rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, - pcpu_alloc_bootmem, pcpu_free_bootmem); + pcpu_cpu_to_node); if (rc) pr_warn("PERCPU: %s allocator failed (%d), " "falling back to page size\n", @@ -899,8 +824,7 @@ void __init setup_per_cpu_areas(void) } if (rc < 0) - rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem, - pcpu_populate_pte); + rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node); if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index 1f07317964e4..618aeccdf691 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -25,8 +25,14 @@ static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src) return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]); } -#define unsafe_get_user_sigset(dst, src, label) \ - unsafe_get_user((dst)->sig[0], (u64 __user *)&(src)->sig[0], label) +#define unsafe_get_user_sigset(dst, src, label) do { \ + sigset_t *__dst = dst; \ + const sigset_t __user *__src = src; \ + int i; \ + \ + for (i = 0; i < _NSIG_WORDS; i++) \ + unsafe_get_user(__dst->sig[i], &__src->sig[i], label); \ +} while (0) #ifdef CONFIG_VSX extern unsigned long copy_vsx_to_user(void __user *to, diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 00a9c9cd6d42..d84c434b2b78 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -527,16 +527,20 @@ static long restore_user_regs(struct pt_regs *regs, regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1)); #ifdef CONFIG_SPE - /* force the process to reload the spe registers from - current->thread when it next does spe instructions */ + /* + * Force the process to reload the spe registers from + * current->thread when it next does spe instructions. + * Since this is user ABI, we must enforce the sizing. + */ + BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32)); regs_set_return_msr(regs, regs->msr & ~MSR_SPE); if (msr & MSR_SPE) { /* restore spe registers from the stack */ - unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, - ELF_NEVRREG * sizeof(u32), failed); + unsafe_copy_from_user(¤t->thread.spe, &sr->mc_vregs, + sizeof(current->thread.spe), failed); current->thread.used_spe = true; } else if (current->thread.used_spe) - memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); + memset(¤t->thread.spe, 0, sizeof(current->thread.spe)); /* Always get SPEFSCR back */ unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); @@ -1063,7 +1067,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, * We kill the task with a SIGSEGV in this situation. */ if (do_setcontext(new_ctx, regs, 0)) { - force_fatal_sig(SIGSEGV); + force_exit_sig(SIGSEGV); return -EFAULT; } diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index ef518535d436..d1e1fc0acbea 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -704,7 +704,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, */ if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) { - force_fatal_sig(SIGSEGV); + force_exit_sig(SIGSEGV); return -EFAULT; } set_current_blocked(&set); @@ -713,7 +713,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, return -EFAULT; if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) { user_read_access_end(); - force_fatal_sig(SIGSEGV); + force_exit_sig(SIGSEGV); return -EFAULT; } user_read_access_end(); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index c23ee842c4c3..b7fd6a72aa76 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -61,6 +61,7 @@ #include <asm/cpu_has_feature.h> #include <asm/ftrace.h> #include <asm/kup.h> +#include <asm/fadump.h> #ifdef DEBUG #include <asm/udbg.h> @@ -621,6 +622,45 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) #endif #ifdef CONFIG_NMI_IPI +static void crash_stop_this_cpu(struct pt_regs *regs) +#else +static void crash_stop_this_cpu(void *dummy) +#endif +{ + /* + * Just busy wait here and avoid marking CPU as offline to ensure + * register data is captured appropriately. + */ + while (1) + cpu_relax(); +} + +void crash_smp_send_stop(void) +{ + static bool stopped = false; + + /* + * In case of fadump, register data for all CPUs is captured by f/w + * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before + * this rtas call to avoid tricky post processing of those CPUs' + * backtraces. + */ + if (should_fadump_crash()) + return; + + if (stopped) + return; + + stopped = true; + +#ifdef CONFIG_NMI_IPI + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000); +#else + smp_call_function(crash_stop_this_cpu, NULL, 0); +#endif /* CONFIG_NMI_IPI */ +} + +#ifdef CONFIG_NMI_IPI static void nmi_stop_this_cpu(struct pt_regs *regs) { /* @@ -896,7 +936,8 @@ out: return tg; } -static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start) +static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, + int cpu, int cpu_group_start) { int first_thread = cpu_first_thread_sibling(cpu); int i; @@ -1635,12 +1676,14 @@ void start_secondary(void *unused) BUG(); } +#ifdef CONFIG_PROFILING int setup_profiling_timer(unsigned int multiplier) { return 0; } +#endif -static void fixup_topology(void) +static void __init fixup_topology(void) { int i; diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index f73f4d72fea4..e0cbd63007f2 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -181,7 +181,7 @@ _GLOBAL(swsusp_arch_resume) #ifdef CONFIG_ALTIVEC /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif sync diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 96bb20715aa9..9f1903c7f540 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -141,7 +141,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR) _GLOBAL(swsusp_arch_resume) /* Stop pending alitvec streams and memory accesses */ BEGIN_FTR_SECTION - DSSALL + PPC_DSSALL END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) sync diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 7bef917cc84e..2600b4237292 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -528,3 +528,5 @@ 446 common landlock_restrict_self sys_landlock_restrict_self # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv +450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 08d8072d6e7a..d45a415d5374 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -214,7 +214,7 @@ static ssize_t __used store_dscr_default(struct device *dev, static DEVICE_ATTR(dscr_default, 0600, show_dscr_default, store_dscr_default); -static void sysfs_create_dscr_default(void) +static void __init sysfs_create_dscr_default(void) { if (cpu_has_feature(CPU_FTR_DSCR)) { int cpu; @@ -744,12 +744,12 @@ static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char } static DEVICE_ATTR(svm, 0444, show_svm, NULL); -static void create_svm_file(void) +static void __init create_svm_file(void) { device_create_file(cpu_subsys.dev_root, &dev_attr_svm); } #else -static void create_svm_file(void) +static void __init create_svm_file(void) { } #endif /* CONFIG_PPC_SVM */ @@ -1110,7 +1110,7 @@ EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group); /* NUMA stuff */ #ifdef CONFIG_NUMA -static void register_nodes(void) +static void __init register_nodes(void) { int i; @@ -1134,7 +1134,7 @@ void sysfs_remove_device_from_node(struct device *dev, int nid) EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node); #else -static void register_nodes(void) +static void __init register_nodes(void) { return; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index cae8f03a44fe..cd0b8b71ecdd 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -88,6 +88,7 @@ static struct clocksource clocksource_timebase = { #define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF u64 decrementer_max = DECREMENTER_DEFAULT_MAX; +EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */ static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev); @@ -107,6 +108,7 @@ struct clock_event_device decrementer_clockevent = { EXPORT_SYMBOL(decrementer_clockevent); DEFINE_PER_CPU(u64, decrementers_next_tb); +EXPORT_SYMBOL_GPL(decrementers_next_tb); static DEFINE_PER_CPU(struct clock_event_device, decrementers); #define XSEC_PER_SEC (1024*1024) @@ -496,6 +498,16 @@ EXPORT_SYMBOL(profile_pc); * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... */ #ifdef CONFIG_PPC64 +static inline unsigned long test_irq_work_pending(void) +{ + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, irq_work_pending))); + return x; +} + static inline void set_irq_work_pending_flag(void) { asm volatile("stb %0,%1(13)" : : @@ -539,13 +551,44 @@ void arch_irq_work_raise(void) preempt_enable(); } +static void set_dec_or_work(u64 val) +{ + set_dec(val); + /* We may have raced with new irq work */ + if (unlikely(test_irq_work_pending())) + set_dec(1); +} + #else /* CONFIG_IRQ_WORK */ #define test_irq_work_pending() 0 #define clear_irq_work_pending() +static void set_dec_or_work(u64 val) +{ + set_dec(val); +} #endif /* CONFIG_IRQ_WORK */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void timer_rearm_host_dec(u64 now) +{ + u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + + WARN_ON_ONCE(!arch_irqs_disabled()); + WARN_ON_ONCE(mfmsr() & MSR_EE); + + if (now >= *next_tb) { + local_paca->irq_happened |= PACA_IRQ_DEC; + } else { + now = *next_tb - now; + if (now <= decrementer_max) + set_dec_or_work(now); + } +} +EXPORT_SYMBOL_GPL(timer_rearm_host_dec); +#endif + /* * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. @@ -566,22 +609,23 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) return; } - /* Ensure a positive value is written to the decrementer, or else - * some CPUs will continue to take decrementer exceptions. When the - * PPC_WATCHDOG (decrementer based) is configured, keep this at most - * 31 bits, which is about 4 seconds on most systems, which gives - * the watchdog a chance of catching timer interrupt hard lockups. - */ - if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) - set_dec(0x7fffffff); - else - set_dec(decrementer_max); - - /* Conditionally hard-enable interrupts now that the DEC has been - * bumped to its maximum value - */ - may_hard_irq_enable(); + /* Conditionally hard-enable interrupts. */ + if (should_hard_irq_enable()) { + /* + * Ensure a positive value is written to the decrementer, or + * else some CPUs will continue to take decrementer exceptions. + * When the PPC_WATCHDOG (decrementer based) is configured, + * keep this at most 31 bits, which is about 4 seconds on most + * systems, which gives the watchdog a chance of catching timer + * interrupt hard lockups. + */ + if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) + set_dec(0x7fffffff); + else + set_dec(decrementer_max); + do_hard_irq_enable(); + } #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) @@ -605,11 +649,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; - if (now <= decrementer_max) - set_dec(now); - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); + if (now > decrementer_max) + now = decrementer_max; + set_dec_or_work(now); __this_cpu_inc(irq_stat.timer_irqs_others); } @@ -730,7 +772,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val) static void start_cpu_decrementer(void) { -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x unsigned int tcr; /* Clear any pending timer interrupts */ @@ -843,11 +885,7 @@ static int decrementer_set_next_event(unsigned long evt, struct clock_event_device *dev) { __this_cpu_write(decrementers_next_tb, get_tb() + evt); - set_dec(evt); - - /* We may have raced with new irq work */ - if (test_irq_work_pending()) - set_dec(1); + set_dec_or_work(evt); return 0; } diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 2b91f233b05d..3beecc32940b 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -226,11 +226,8 @@ _GLOBAL(tm_reclaim) /* Sync the userland GPRs 2-12, 14-31 to thread->regs: */ SAVE_GPR(0, r7) /* user r0 */ - SAVE_GPR(2, r7) /* user r2 */ - SAVE_4GPRS(3, r7) /* user r3-r6 */ - SAVE_GPR(8, r7) /* user r8 */ - SAVE_GPR(9, r7) /* user r9 */ - SAVE_GPR(10, r7) /* user r10 */ + SAVE_GPRS(2, 6, r7) /* user r2-r6 */ + SAVE_GPRS(8, 10, r7) /* user r8-r10 */ ld r3, GPR1(r1) /* user r1 */ ld r4, GPR7(r1) /* user r7 */ ld r5, GPR11(r1) /* user r11 */ @@ -445,12 +442,8 @@ restore_gprs: ld r6, THREAD_TM_PPR(r3) REST_GPR(0, r7) /* GPR0 */ - REST_2GPRS(2, r7) /* GPR2-3 */ - REST_GPR(4, r7) /* GPR4 */ - REST_4GPRS(8, r7) /* GPR8-11 */ - REST_2GPRS(12, r7) /* GPR12-13 */ - - REST_NVGPRS(r7) /* GPR14-31 */ + REST_GPRS(2, 4, r7) /* GPR2-4 */ + REST_GPRS(8, 31, r7) /* GPR8-31 */ /* Load up PPR and DSCR here so we don't run with user values for long */ mtspr SPRN_DSCR, r5 diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index d89c5df4f206..80b6285769f2 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -41,10 +41,10 @@ #define NUM_FTRACE_TRAMPS 8 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; -static struct ppc_inst +static ppc_inst_t ftrace_call_replace(unsigned long ip, unsigned long addr, int link) { - struct ppc_inst op; + ppc_inst_t op; addr = ppc_function_entry((void *)addr); @@ -55,9 +55,9 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link) } static int -ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) +ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) { - struct ppc_inst replaced; + ppc_inst_t replaced; /* * Note: @@ -90,24 +90,24 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) */ static int test_24bit_addr(unsigned long ip, unsigned long addr) { - struct ppc_inst op; + ppc_inst_t op; addr = ppc_function_entry((void *)addr); /* use the create_branch to verify that this offset can be branched */ return create_branch(&op, (u32 *)ip, addr, 0) == 0; } -static int is_bl_op(struct ppc_inst op) +static int is_bl_op(ppc_inst_t op) { return (ppc_inst_val(op) & 0xfc000003) == 0x48000001; } -static int is_b_op(struct ppc_inst op) +static int is_b_op(ppc_inst_t op) { return (ppc_inst_val(op) & 0xfc000003) == 0x48000000; } -static unsigned long find_bl_target(unsigned long ip, struct ppc_inst op) +static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) { int offset; @@ -127,7 +127,7 @@ __ftrace_make_nop(struct module *mod, { unsigned long entry, ptr, tramp; unsigned long ip = rec->ip; - struct ppc_inst op, pop; + ppc_inst_t op, pop; /* read where this goes */ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { @@ -221,10 +221,9 @@ static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - struct ppc_inst op; - unsigned int jmp[4]; + ppc_inst_t op; unsigned long ip = rec->ip; - unsigned long tramp; + unsigned long tramp, ptr; if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE)) return -EFAULT; @@ -238,41 +237,13 @@ __ftrace_make_nop(struct module *mod, /* lets find where the pointer goes */ tramp = find_bl_target(ip, op); - /* - * On PPC32 the trampoline looks like: - * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha - * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l - * 0x7d, 0x89, 0x03, 0xa6 mtctr r12 - * 0x4e, 0x80, 0x04, 0x20 bctr - */ - - pr_devel("ip:%lx jumps to %lx", ip, tramp); - /* Find where the trampoline jumps to */ - if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) { - pr_err("Failed to read %lx\n", tramp); + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); return -EFAULT; } - pr_devel(" %08x %08x ", jmp[0], jmp[1]); - - /* verify that this is what we expect it to be */ - if (((jmp[0] & 0xffff0000) != 0x3d800000) || - ((jmp[1] & 0xffff0000) != 0x398c0000) || - (jmp[2] != 0x7d8903a6) || - (jmp[3] != 0x4e800420)) { - pr_err("Not a trampoline\n"); - return -EINVAL; - } - - tramp = (jmp[1] & 0xffff) | - ((jmp[0] & 0xffff) << 16); - if (tramp & 0x8000) - tramp -= 0x10000; - - pr_devel(" %lx ", tramp); - - if (tramp != addr) { + if (ptr != addr) { pr_err("Trampoline location %08lx does not match addr\n", tramp); return -EINVAL; @@ -291,7 +262,7 @@ __ftrace_make_nop(struct module *mod, static unsigned long find_ftrace_tramp(unsigned long ip) { int i; - struct ppc_inst instr; + ppc_inst_t instr; /* * We have the compiler generated long_branch tramps at the end @@ -329,9 +300,9 @@ static int add_ftrace_tramp(unsigned long tramp) static int setup_mcount_compiler_tramp(unsigned long tramp) { int i; - struct ppc_inst op; + ppc_inst_t op; unsigned long ptr; - struct ppc_inst instr; + ppc_inst_t instr; static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS]; /* Is this a known long jump tramp? */ @@ -396,7 +367,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp) static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) { unsigned long tramp, ip = rec->ip; - struct ppc_inst op; + ppc_inst_t op; /* Read where this goes */ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { @@ -436,7 +407,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; - struct ppc_inst old, new; + ppc_inst_t old, new; /* * If the calling address is more that 24 bits away, @@ -489,7 +460,7 @@ int ftrace_make_nop(struct module *mod, */ #ifndef CONFIG_MPROFILE_KERNEL static int -expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) +expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) { /* * We expect to see: @@ -507,7 +478,7 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) } #else static int -expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) +expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) { /* look for patched "NOP" on ppc64 with -mprofile-kernel */ if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()))) @@ -519,8 +490,8 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - struct ppc_inst op[2]; - struct ppc_inst instr; + ppc_inst_t op[2]; + ppc_inst_t instr; void *ip = (void *)rec->ip; unsigned long entry, ptr, tramp; struct module *mod = rec->arch.mod; @@ -588,8 +559,10 @@ static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { int err; - struct ppc_inst op; + ppc_inst_t op; u32 *ip = (u32 *)rec->ip; + struct module *mod = rec->arch.mod; + unsigned long tramp; /* read where this goes */ if (copy_inst_from_kernel_nofault(&op, ip)) @@ -602,13 +575,23 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } /* If we never set up a trampoline to ftrace_caller, then bail */ - if (!rec->arch.mod->arch.tramp) { +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (!mod->arch.tramp || !mod->arch.tramp_regs) { +#else + if (!mod->arch.tramp) { +#endif pr_err("No ftrace trampoline\n"); return -EINVAL; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + if (rec->flags & FTRACE_FL_REGS) + tramp = mod->arch.tramp_regs; + else +#endif + tramp = mod->arch.tramp; /* create the branch to the trampoline */ - err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK); + err = create_branch(&op, ip, tramp, BRANCH_SET_LINK); if (err) { pr_err("REL24 out of range!\n"); return -EINVAL; @@ -626,7 +609,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) { - struct ppc_inst op; + ppc_inst_t op; void *ip = (void *)rec->ip; unsigned long tramp, entry, ptr; @@ -674,7 +657,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long ip = rec->ip; - struct ppc_inst old, new; + ppc_inst_t old, new; /* * If the calling address is more that 24 bits away, @@ -713,7 +696,7 @@ static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - struct ppc_inst op; + ppc_inst_t op; unsigned long ip = rec->ip; unsigned long entry, ptr, tramp; struct module *mod = rec->arch.mod; @@ -807,7 +790,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { unsigned long ip = rec->ip; - struct ppc_inst old, new; + ppc_inst_t old, new; /* * If the calling address is more that 24 bits away, @@ -847,7 +830,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); - struct ppc_inst old, new; + ppc_inst_t old, new; int ret; old = ppc_inst_read((u32 *)&ftrace_call); @@ -932,7 +915,7 @@ int ftrace_enable_ftrace_graph_caller(void) unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); - struct ppc_inst old, new; + ppc_inst_t old, new; old = ftrace_call_replace(ip, stub, 0); new = ftrace_call_replace(ip, addr, 0); @@ -945,7 +928,7 @@ int ftrace_disable_ftrace_graph_caller(void) unsigned long ip = (unsigned long)(&ftrace_graph_call); unsigned long addr = (unsigned long)(&ftrace_graph_caller); unsigned long stub = (unsigned long)(&ftrace_graph_stub); - struct ppc_inst old, new; + ppc_inst_t old, new; old = ftrace_call_replace(ip, addr, 0); new = ftrace_call_replace(ip, stub, 0); diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index e023ae59c429..0a02c0cb12d9 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -9,55 +9,135 @@ #include <asm/asm-offsets.h> #include <asm/ftrace.h> #include <asm/export.h> +#include <asm/ptrace.h> _GLOBAL(mcount) _GLOBAL(_mcount) /* * It is required that _mcount on PPC32 must preserve the - * link register. But we have r0 to play with. We use r0 + * link register. But we have r12 to play with. We use r12 * to push the return address back to the caller of mcount * into the ctr register, restore the link register and * then jump back using the ctr register. */ - mflr r0 - mtctr r0 - lwz r0, 4(r1) + mflr r12 + mtctr r12 mtlr r0 bctr +EXPORT_SYMBOL(_mcount) _GLOBAL(ftrace_caller) MCOUNT_SAVE_FRAME /* r3 ends up with link register */ subi r3, r3, MCOUNT_INSN_SIZE + lis r5,function_trace_op@ha + lwz r5,function_trace_op@l(r5) + li r6, 0 .globl ftrace_call ftrace_call: bl ftrace_stub nop + MCOUNT_RESTORE_FRAME +ftrace_caller_common: #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl ftrace_graph_call ftrace_graph_call: b ftrace_graph_stub _GLOBAL(ftrace_graph_stub) #endif - MCOUNT_RESTORE_FRAME /* old link register ends up in ctr reg */ bctr -EXPORT_SYMBOL(_mcount) _GLOBAL(ftrace_stub) blr +_GLOBAL(ftrace_regs_caller) + /* Save the original return address in A's stack frame */ + stw r0,LRSAVE(r1) + + /* Create our stack frame + pt_regs */ + stwu r1,-INT_FRAME_SIZE(r1) + + /* Save all gprs to pt_regs */ + stw r0, GPR0(r1) + stmw r2, GPR2(r1) + + /* Save previous stack pointer (r1) */ + addi r8, r1, INT_FRAME_SIZE + stw r8, GPR1(r1) + + /* Load special regs for save below */ + mfmsr r8 + mfctr r9 + mfxer r10 + mfcr r11 + + /* Get the _mcount() call site out of LR */ + mflr r7 + /* Save it as pt_regs->nip */ + stw r7, _NIP(r1) + /* Save the read LR in pt_regs->link */ + stw r0, _LINK(r1) + + lis r3,function_trace_op@ha + lwz r5,function_trace_op@l(r3) + + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r7, MCOUNT_INSN_SIZE + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + + /* Save special regs */ + stw r8, _MSR(r1) + stw r9, _CTR(r1) + stw r10, _XER(r1) + stw r11, _CCR(r1) + + /* Load &pt_regs in r6 for call below */ + addi r6, r1, STACK_FRAME_OVERHEAD + + /* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_regs_call +ftrace_regs_call: + bl ftrace_stub + nop + + /* Load ctr with the possibly modified NIP */ + lwz r3, _NIP(r1) + mtctr r3 + + /* Restore gprs */ + lmw r2, GPR2(r1) + + /* Restore possibly modified LR */ + lwz r0, _LINK(r1) + mtlr r0 + + /* Pop our stack frame */ + addi r1, r1, INT_FRAME_SIZE + + b ftrace_caller_common + #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(ftrace_graph_caller) + stwu r1,-48(r1) + stw r3, 12(r1) + stw r4, 16(r1) + stw r5, 20(r1) + stw r6, 24(r1) + stw r7, 28(r1) + stw r8, 32(r1) + stw r9, 36(r1) + stw r10,40(r1) + addi r5, r1, 48 - /* load r4 with local address */ - lwz r4, 44(r1) + mfctr r4 /* ftrace_caller has moved local addr here */ + stw r4, 44(r1) + mflr r3 /* ftrace_caller has restored LR from stack */ subi r4, r4, MCOUNT_INSN_SIZE - /* Grab the LR out of the caller stack frame */ - lwz r3,52(r1) - bl prepare_ftrace_return nop @@ -66,9 +146,21 @@ _GLOBAL(ftrace_graph_caller) * Change the LR in the callers stack frame to this. */ stw r3,52(r1) + mtlr r3 + lwz r0,44(r1) + mtctr r0 + + lwz r3, 12(r1) + lwz r4, 16(r1) + lwz r5, 20(r1) + lwz r6, 24(r1) + lwz r7, 28(r1) + lwz r8, 32(r1) + lwz r9, 36(r1) + lwz r10,40(r1) + + addi r1, r1, 48 - MCOUNT_RESTORE_FRAME - /* old link register ends up in ctr reg */ bctr _GLOBAL(return_to_handler) diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index f9fd5f743eba..d636fc755f60 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -41,15 +41,14 @@ _GLOBAL(ftrace_regs_caller) /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) - SAVE_10GPRS(2, r1) + SAVE_GPRS(2, 11, r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 beq ftrace_no_trace - SAVE_10GPRS(12, r1) - SAVE_10GPRS(22, r1) + SAVE_GPRS(12, 31, r1) /* Save previous stack pointer (r1) */ addi r8, r1, SWITCH_FRAME_SIZE @@ -108,10 +107,8 @@ ftrace_regs_call: #endif /* Restore gprs */ - REST_GPR(0,r1) - REST_10GPRS(2,r1) - REST_10GPRS(12,r1) - REST_10GPRS(22,r1) + REST_GPR(0, r1) + REST_GPRS(2, 31, r1) /* Restore possibly modified LR */ ld r0, _LINK(r1) @@ -157,7 +154,7 @@ _GLOBAL(ftrace_caller) stdu r1, -SWITCH_FRAME_SIZE(r1) /* Save all gprs to pt_regs */ - SAVE_8GPRS(3, r1) + SAVE_GPRS(3, 10, r1) lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 @@ -194,7 +191,7 @@ ftrace_call: mtctr r3 /* Restore gprs */ - REST_8GPRS(3,r1) + REST_GPRS(3, 10, r1) /* Restore callee's TOC */ ld r2, 24(r1) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 11741703d26e..a08bb7cefdc5 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -245,7 +245,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, if (panic_on_oops) panic("Fatal exception"); - do_exit(signr); + make_task_dead(signr); } NOKPROBE_SYMBOL(oops_end); @@ -792,9 +792,9 @@ int machine_check_generic(struct pt_regs *regs) void die_mce(const char *str, struct pt_regs *regs, long err) { /* - * The machine check wants to kill the interrupted context, but - * do_exit() checks for in_interrupt() and panics in that case, so - * exit the irq/nmi before calling die. + * The machine check wants to kill the interrupted context, + * but make_task_dead() checks for in_interrupt() and panics + * in that case, so exit the irq/nmi before calling die. */ if (in_nmi()) nmi_exit(); diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 8513aa49614e..d3942de254c6 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -84,7 +84,7 @@ static int udbg_uart_getc(void) return udbg_uart_in(UART_RBR); } -static void udbg_use_uart(void) +static void __init udbg_use_uart(void) { udbg_putc = udbg_uart_putc; udbg_flush = udbg_uart_flush; @@ -92,7 +92,7 @@ static void udbg_use_uart(void) udbg_getc_poll = udbg_uart_getc_poll; } -void udbg_uart_setup(unsigned int speed, unsigned int clock) +void __init udbg_uart_setup(unsigned int speed, unsigned int clock) { unsigned int dll, base_bauds; @@ -121,7 +121,7 @@ void udbg_uart_setup(unsigned int speed, unsigned int clock) udbg_uart_out(UART_FCR, 0x7); } -unsigned int udbg_probe_uart_speed(unsigned int clock) +unsigned int __init udbg_probe_uart_speed(unsigned int clock) { unsigned int dll, dlm, divisor, prescaler, speed; u8 old_lcr; @@ -172,7 +172,7 @@ static void udbg_uart_out_pio(unsigned int reg, u8 data) outb(data, udbg_uart.pio_base + (reg * udbg_uart_stride)); } -void udbg_uart_init_pio(unsigned long port, unsigned int stride) +void __init udbg_uart_init_pio(unsigned long port, unsigned int stride) { if (!port) return; @@ -194,7 +194,7 @@ static void udbg_uart_out_mmio(unsigned int reg, u8 data) } -void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) +void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) { if (!addr) return; diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index ae632569446f..fd9432875ebc 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -261,7 +261,7 @@ static unsigned int rfin(unsigned int x) int emulate_altivec(struct pt_regs *regs) { - struct ppc_inst instr; + ppc_inst_t instr; unsigned int i, word; unsigned int va, vb, vc, vd; vector128 *vrs; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index ba03eedfdcd8..5cc24d8cce94 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -47,6 +47,10 @@ EXPORT_SYMBOL(store_vr_state) */ _GLOBAL(load_up_altivec) mfmsr r5 /* grab the current MSR */ +#ifdef CONFIG_PPC_BOOK3S_64 + /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ + ori r5,r5,MSR_RI +#endif oris r5,r5,MSR_VEC@h MTMSRD(r5) /* enable use of AltiVec now */ isync @@ -126,6 +130,12 @@ _GLOBAL(load_up_vsx) andis. r5,r12,MSR_VEC@h beql+ load_up_altivec /* skip if already loaded */ +#ifdef CONFIG_PPC_BOOK3S_64 + /* interrupt doesn't set MSR[RI] and HPT can fault on current access */ + li r5,MSR_RI + mtmsrd r5,1 +#endif + ld r4,PACACURRENT(r13) addi r4,r4,THREAD /* Get THREAD */ li r6,1 diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 18e42c74abdd..2bcca818136a 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -322,10 +322,6 @@ SECTIONS #ifdef CONFIG_PPC32 .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA -#ifdef CONFIG_UBSAN - *(.data..Lubsan_data*) - *(.data..Lubsan_type*) -#endif *(.data.rel*) *(SDATA_MAIN) *(.sdata2) @@ -336,24 +332,18 @@ SECTIONS #else .data : AT(ADDR(.data) - LOAD_OFFSET) { DATA_DATA -#ifdef CONFIG_UBSAN - *(.data..Lubsan_data*) - *(.data..Lubsan_type*) -#endif *(.data.rel*) *(.toc1) *(.branch_lt) } - . = ALIGN(256); - .got : AT(ADDR(.got) - LOAD_OFFSET) { - __toc_start = .; + .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) { + *(.got) #ifndef CONFIG_RELOCATABLE __prom_init_toc_start = .; - arch/powerpc/kernel/prom_init.o*(.toc .got) + arch/powerpc/kernel/prom_init.o*(.toc) __prom_init_toc_end = .; #endif - *(.got) *(.toc) } #endif diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index f9ea0e5357f9..bfc27496fe7e 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -85,10 +85,37 @@ static DEFINE_PER_CPU(u64, wd_timer_tb); /* SMP checker bits */ static unsigned long __wd_smp_lock; +static unsigned long __wd_reporting; +static unsigned long __wd_nmi_output; static cpumask_t wd_smp_cpus_pending; static cpumask_t wd_smp_cpus_stuck; static u64 wd_smp_last_reset_tb; +/* + * Try to take the exclusive watchdog action / NMI IPI / printing lock. + * wd_smp_lock must be held. If this fails, we should return and wait + * for the watchdog to kick in again (or another CPU to trigger it). + * + * Importantly, if hardlockup_panic is set, wd_try_report failure should + * not delay the panic, because whichever other CPU is reporting will + * call panic. + */ +static bool wd_try_report(void) +{ + if (__wd_reporting) + return false; + __wd_reporting = 1; + return true; +} + +/* End printing after successful wd_try_report. wd_smp_lock not required. */ +static void wd_end_reporting(void) +{ + smp_mb(); /* End printing "critical section" */ + WARN_ON_ONCE(__wd_reporting == 0); + WRITE_ONCE(__wd_reporting, 0); +} + static inline void wd_smp_lock(unsigned long *flags) { /* @@ -128,103 +155,182 @@ static void wd_lockup_ipi(struct pt_regs *regs) else dump_stack(); + /* + * __wd_nmi_output must be set after we printk from NMI context. + * + * printk from NMI context defers printing to the console to irq_work. + * If that NMI was taken in some code that is hard-locked, then irqs + * are disabled so irq_work will never fire. That can result in the + * hard lockup messages being delayed (indefinitely, until something + * else kicks the console drivers). + * + * Setting __wd_nmi_output will cause another CPU to notice and kick + * the console drivers for us. + * + * xchg is not needed here (it could be a smp_mb and store), but xchg + * gives the memory ordering and atomicity required. + */ + xchg(&__wd_nmi_output, 1); + /* Do not panic from here because that can recurse into NMI IPI layer */ } -static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) +static bool set_cpu_stuck(int cpu) { - cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask); - cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask); + cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); + cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + /* + * See wd_smp_clear_cpu_pending() + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { - wd_smp_last_reset_tb = tb; + wd_smp_last_reset_tb = get_tb(); cpumask_andnot(&wd_smp_cpus_pending, &wd_cpus_enabled, &wd_smp_cpus_stuck); + return true; } -} -static void set_cpu_stuck(int cpu, u64 tb) -{ - set_cpumask_stuck(cpumask_of(cpu), tb); + return false; } -static void watchdog_smp_panic(int cpu, u64 tb) +static void watchdog_smp_panic(int cpu) { + static cpumask_t wd_smp_cpus_ipi; // protected by reporting unsigned long flags; + u64 tb, last_reset; int c; wd_smp_lock(&flags); /* Double check some things under lock */ - if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb) + tb = get_tb(); + last_reset = wd_smp_last_reset_tb; + if ((s64)(tb - last_reset) < (s64)wd_smp_panic_timeout_tb) goto out; if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) goto out; - if (cpumask_weight(&wd_smp_cpus_pending) == 0) + if (!wd_try_report()) + goto out; + for_each_online_cpu(c) { + if (!cpumask_test_cpu(c, &wd_smp_cpus_pending)) + continue; + if (c == cpu) + continue; // should not happen + + __cpumask_set_cpu(c, &wd_smp_cpus_ipi); + if (set_cpu_stuck(c)) + break; + } + if (cpumask_empty(&wd_smp_cpus_ipi)) { + wd_end_reporting(); goto out; + } + wd_smp_unlock(&flags); pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n", - cpu, cpumask_pr_args(&wd_smp_cpus_pending)); + cpu, cpumask_pr_args(&wd_smp_cpus_ipi)); pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n", - cpu, tb, wd_smp_last_reset_tb, - tb_to_ns(tb - wd_smp_last_reset_tb) / 1000000); + cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000); if (!sysctl_hardlockup_all_cpu_backtrace) { /* * Try to trigger the stuck CPUs, unless we are going to * get a backtrace on all of them anyway. */ - for_each_cpu(c, &wd_smp_cpus_pending) { - if (c == cpu) - continue; + for_each_cpu(c, &wd_smp_cpus_ipi) { smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000); + __cpumask_clear_cpu(c, &wd_smp_cpus_ipi); } - } - - /* Take the stuck CPUs out of the watch group */ - set_cpumask_stuck(&wd_smp_cpus_pending, tb); - - wd_smp_unlock(&flags); - - if (sysctl_hardlockup_all_cpu_backtrace) + } else { trigger_allbutself_cpu_backtrace(); + cpumask_clear(&wd_smp_cpus_ipi); + } if (hardlockup_panic) nmi_panic(NULL, "Hard LOCKUP"); + wd_end_reporting(); + return; out: wd_smp_unlock(&flags); } -static void wd_smp_clear_cpu_pending(int cpu, u64 tb) +static void wd_smp_clear_cpu_pending(int cpu) { if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) { if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) { struct pt_regs *regs = get_irq_regs(); unsigned long flags; - wd_smp_lock(&flags); - pr_emerg("CPU %d became unstuck TB:%lld\n", - cpu, tb); + cpu, get_tb()); print_irqtrace_events(current); if (regs) show_regs(regs); else dump_stack(); + wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); wd_smp_unlock(&flags); + } else { + /* + * The last CPU to clear pending should have reset the + * watchdog so we generally should not find it empty + * here if our CPU was clear. However it could happen + * due to a rare race with another CPU taking the + * last CPU out of the mask concurrently. + * + * We can't add a warning for it. But just in case + * there is a problem with the watchdog that is causing + * the mask to not be reset, try to kick it along here. + */ + if (unlikely(cpumask_empty(&wd_smp_cpus_pending))) + goto none_pending; } return; } + + /* + * All other updates to wd_smp_cpus_pending are performed under + * wd_smp_lock. All of them are atomic except the case where the + * mask becomes empty and is reset. This will not happen here because + * cpu was tested to be in the bitmap (above), and a CPU only clears + * its own bit. _Except_ in the case where another CPU has detected a + * hard lockup on our CPU and takes us out of the pending mask. So in + * normal operation there will be no race here, no problem. + * + * In the lockup case, this atomic clear-bit vs a store that refills + * other bits in the accessed word wll not be a problem. The bit clear + * is atomic so it will not cause the store to get lost, and the store + * will never set this bit so it will not overwrite the bit clear. The + * only way for a stuck CPU to return to the pending bitmap is to + * become unstuck itself. + */ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + + /* + * Order the store to clear pending with the load(s) to check all + * words in the pending mask to check they are all empty. This orders + * with the same barrier on another CPU. This prevents two CPUs + * clearing the last 2 pending bits, but neither seeing the other's + * store when checking if the mask is empty, and missing an empty + * mask, which ends with a false positive. + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { unsigned long flags; +none_pending: + /* + * Double check under lock because more than one CPU could see + * a clear mask with the lockless check after clearing their + * pending bits. + */ wd_smp_lock(&flags); if (cpumask_empty(&wd_smp_cpus_pending)) { - wd_smp_last_reset_tb = tb; + wd_smp_last_reset_tb = get_tb(); cpumask_andnot(&wd_smp_cpus_pending, &wd_cpus_enabled, &wd_smp_cpus_stuck); @@ -239,10 +345,21 @@ static void watchdog_timer_interrupt(int cpu) per_cpu(wd_timer_tb, cpu) = tb; - wd_smp_clear_cpu_pending(cpu, tb); + wd_smp_clear_cpu_pending(cpu); if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb) - watchdog_smp_panic(cpu, tb); + watchdog_smp_panic(cpu); + + if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) { + /* + * Something has called printk from NMI context. It might be + * stuck, so this this triggers a flush that will get that + * printk output to the console. + * + * See wd_lockup_ipi. + */ + printk_trigger_flush(); + } } DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) @@ -261,12 +378,27 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) { + /* + * Taking wd_smp_lock here means it is a soft-NMI lock, which + * means we can't take any regular or irqsafe spin locks while + * holding this lock. This is why timers can't printk while + * holding the lock. + */ wd_smp_lock(&flags); if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) { wd_smp_unlock(&flags); return 0; } - set_cpu_stuck(cpu, tb); + if (!wd_try_report()) { + wd_smp_unlock(&flags); + /* Couldn't report, try again in 100ms */ + mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000); + return 0; + } + + set_cpu_stuck(cpu); + + wd_smp_unlock(&flags); pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n", cpu, (void *)regs->nip); @@ -277,14 +409,21 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) print_irqtrace_events(current); show_regs(regs); - wd_smp_unlock(&flags); + xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi if (sysctl_hardlockup_all_cpu_backtrace) trigger_allbutself_cpu_backtrace(); if (hardlockup_panic) nmi_panic(regs, "Hard LOCKUP"); + + wd_end_reporting(); } + /* + * We are okay to change DEC in soft_nmi_interrupt because the masked + * handler has marked a DEC as pending, so the timer interrupt will be + * replayed as soon as local irqs are enabled again. + */ if (wd_panic_timeout_tb < 0x7fffffff) mtspr(SPRN_DEC, wd_panic_timeout_tb); @@ -312,11 +451,15 @@ void arch_touch_nmi_watchdog(void) { unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; int cpu = smp_processor_id(); - u64 tb = get_tb(); + u64 tb; + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return; + + tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) { per_cpu(wd_timer_tb, cpu) = tb; - wd_smp_clear_cpu_pending(cpu, tb); + wd_smp_clear_cpu_pending(cpu); } } EXPORT_SYMBOL(arch_touch_nmi_watchdog); @@ -374,7 +517,7 @@ static void stop_watchdog(void *arg) cpumask_clear_cpu(cpu, &wd_cpus_enabled); wd_smp_unlock(&flags); - wd_smp_clear_cpu_pending(cpu, get_tb()); + wd_smp_clear_cpu_pending(cpu); } static int stop_watchdog_on_cpu(unsigned int cpu) |