diff options
Diffstat (limited to 'arch/powerpc/kernel')
72 files changed, 1165 insertions, 682 deletions
| diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b039877c743d..4d7829399570 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -11,6 +11,7 @@ CFLAGS_prom_init.o      += -fPIC  CFLAGS_btext.o		+= -fPIC  endif +CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)  CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)  CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)  CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index bf96b954a4eb..3e37ece06739 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -105,7 +105,7 @@ static struct aligninfo spe_aligninfo[32] = {   * so we don't need the address swizzling.   */  static int emulate_spe(struct pt_regs *regs, unsigned int reg, -		       struct ppc_inst ppc_instr) +		       ppc_inst_t ppc_instr)  {  	union {  		u64 ll; @@ -300,7 +300,7 @@ Efault_write:  int fix_alignment(struct pt_regs *regs)  { -	struct ppc_inst instr; +	ppc_inst_t instr;  	struct instruction_op op;  	int r, type; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index cc05522f50bf..7582f3e3a330 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -54,7 +54,7 @@  #endif  #ifdef CONFIG_PPC32 -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x  #include "head_booke.h"  #endif  #endif @@ -139,6 +139,7 @@ int main(void)  	OFFSET(THR11, thread_struct, r11);  	OFFSET(THLR, thread_struct, lr);  	OFFSET(THCTR, thread_struct, ctr); +	OFFSET(THSR0, thread_struct, sr0);  #endif  #ifdef CONFIG_SPE  	OFFSET(THREAD_EVR0, thread_struct, evr[0]); @@ -218,10 +219,12 @@ int main(void)  	OFFSET(PACA_EXGEN, paca_struct, exgen);  	OFFSET(PACA_EXMC, paca_struct, exmc);  	OFFSET(PACA_EXNMI, paca_struct, exnmi); +#ifdef CONFIG_PPC_64S_HASH_MMU  	OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);  	OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);  	OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);  	OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area); +#endif  	OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE  	OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use); diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 803c2a45b22a..9d9d56b574cc 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -161,7 +161,7 @@ void btext_map(void)  	boot_text_mapped = 1;  } -static int btext_initialize(struct device_node *np) +static int __init btext_initialize(struct device_node *np)  {  	unsigned int width, height, depth, pitch;  	unsigned long address = 0; @@ -241,8 +241,10 @@ int __init btext_find_display(int allow_nonstdout)  			rc = btext_initialize(np);  			printk("result: %d\n", rc);  		} -		if (rc == 0) +		if (rc == 0) { +			of_node_put(np);  			break; +		}  	}  	return rc;  } @@ -290,7 +292,7 @@ void btext_update_display(unsigned long phys, int width, int height,  }  EXPORT_SYMBOL(btext_update_display); -void btext_clearscreen(void) +void __init btext_clearscreen(void)  {  	unsigned int *base	= (unsigned int *)calc_base(0, 0);  	unsigned long width 	= ((dispDeviceRect[2] - dispDeviceRect[0]) * @@ -308,7 +310,7 @@ void btext_clearscreen(void)  	rmci_maybe_off();  } -void btext_flushscreen(void) +void __init btext_flushscreen(void)  {  	unsigned int *base	= (unsigned int *)calc_base(0, 0);  	unsigned long width 	= ((dispDeviceRect[2] - dispDeviceRect[0]) * @@ -327,7 +329,7 @@ void btext_flushscreen(void)  	__asm__ __volatile__ ("sync" ::: "memory");  } -void btext_flushline(void) +void __init btext_flushline(void)  {  	unsigned int *base	= (unsigned int *)calc_base(0, g_loc_Y << 4);  	unsigned long width 	= ((dispDeviceRect[2] - dispDeviceRect[0]) * @@ -542,7 +544,7 @@ void btext_drawstring(const char *c)  		btext_drawchar(*c++);  } -void btext_drawtext(const char *c, unsigned int len) +void __init btext_drawtext(const char *c, unsigned int len)  {  	if (!boot_text_mapped)  		return; @@ -550,7 +552,7 @@ void btext_drawtext(const char *c, unsigned int len)  		btext_drawchar(*c++);  } -void btext_drawhex(unsigned long v) +void __init btext_drawhex(unsigned long v)  {  	if (!boot_text_mapped)  		return; diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index cf1be75b7833..00b0992be3e7 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -710,7 +710,7 @@ static struct kobj_attribute cache_shared_cpu_list_attr =  	__ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL);  /* Attributes which should always be created -- the kobject/sysfs core - * does this automatically via kobj_type->default_attrs.  This is the + * does this automatically via kobj_type->default_groups.  This is the   * minimum data required to uniquely identify a cache.   */  static struct attribute *cache_index_default_attrs[] = { @@ -720,6 +720,7 @@ static struct attribute *cache_index_default_attrs[] = {  	&cache_shared_cpu_list_attr.attr,  	NULL,  }; +ATTRIBUTE_GROUPS(cache_index_default);  /* Attributes which should be created if the cache device node has the   * right properties -- see cacheinfo_create_index_opt_attrs @@ -738,7 +739,7 @@ static const struct sysfs_ops cache_index_ops = {  static struct kobj_type cache_index_type = {  	.release = cache_index_release,  	.sysfs_ops = &cache_index_ops, -	.default_attrs = cache_index_default_attrs, +	.default_groups = cache_index_default_groups,  };  static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir) diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c index 3cca88ee96d7..3dc61e203f37 100644 --- a/arch/powerpc/kernel/cpu_setup_power.c +++ b/arch/powerpc/kernel/cpu_setup_power.c @@ -109,7 +109,7 @@ static void init_PMU_HV_ISA207(void)  static void init_PMU(void)  {  	mtspr(SPRN_MMCRA, 0); -	mtspr(SPRN_MMCR0, 0); +	mtspr(SPRN_MMCR0, MMCR0_FC);  	mtspr(SPRN_MMCR1, 0);  	mtspr(SPRN_MMCR2, 0);  } @@ -123,7 +123,7 @@ static void init_PMU_ISA31(void)  {  	mtspr(SPRN_MMCR3, 0);  	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); -	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); +	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);  }  /* @@ -137,6 +137,7 @@ void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t)  		return;  	mtspr(SPRN_LPID, 0); +	mtspr(SPRN_AMOR, ~0);  	mtspr(SPRN_PCR, PCR_MASK);  	init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);  } @@ -150,6 +151,7 @@ void __restore_cpu_power7(void)  		return;  	mtspr(SPRN_LPID, 0); +	mtspr(SPRN_AMOR, ~0);  	mtspr(SPRN_PCR, PCR_MASK);  	init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);  } @@ -164,6 +166,7 @@ void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t)  		return;  	mtspr(SPRN_LPID, 0); +	mtspr(SPRN_AMOR, ~0);  	mtspr(SPRN_PCR, PCR_MASK);  	init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */  	init_HFSCR(); @@ -184,6 +187,7 @@ void __restore_cpu_power8(void)  		return;  	mtspr(SPRN_LPID, 0); +	mtspr(SPRN_AMOR, ~0);  	mtspr(SPRN_PCR, PCR_MASK);  	init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */  	init_HFSCR(); @@ -202,6 +206,7 @@ void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t)  	mtspr(SPRN_PSSCR, 0);  	mtspr(SPRN_LPID, 0);  	mtspr(SPRN_PID, 0); +	mtspr(SPRN_AMOR, ~0);  	mtspr(SPRN_PCR, PCR_MASK);  	init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\  			 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); @@ -223,6 +228,7 @@ void __restore_cpu_power9(void)  	mtspr(SPRN_PSSCR, 0);  	mtspr(SPRN_LPID, 0);  	mtspr(SPRN_PID, 0); +	mtspr(SPRN_AMOR, ~0);  	mtspr(SPRN_PCR, PCR_MASK);  	init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\  			 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); @@ -242,6 +248,7 @@ void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t)  	mtspr(SPRN_PSSCR, 0);  	mtspr(SPRN_LPID, 0);  	mtspr(SPRN_PID, 0); +	mtspr(SPRN_AMOR, ~0);  	mtspr(SPRN_PCR, PCR_MASK);  	init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\  			 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); @@ -264,6 +271,7 @@ void __restore_cpu_power10(void)  	mtspr(SPRN_PSSCR, 0);  	mtspr(SPRN_LPID, 0);  	mtspr(SPRN_PID, 0); +	mtspr(SPRN_AMOR, ~0);  	mtspr(SPRN_PCR, PCR_MASK);  	init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\  			 LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 5545c9cd17c1..f55c6fb34a3a 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -27,7 +27,8 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)  	ppc_msgsync(); -	may_hard_irq_enable(); +	if (should_hard_irq_enable()) +		do_hard_irq_enable();  	kvmppc_clear_host_ipi(smp_processor_id());  	__this_cpu_inc(irq_stat.doorbell_irqs); diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index ba527fb52993..7d1b2c4a4891 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -80,6 +80,7 @@ static void __restore_cpu_cpufeatures(void)  	mtspr(SPRN_LPCR, system_registers.lpcr);  	if (hv_mode) {  		mtspr(SPRN_LPID, 0); +		mtspr(SPRN_AMOR, ~0);  		mtspr(SPRN_HFSCR, system_registers.hfscr);  		mtspr(SPRN_PCR, system_registers.pcr);  	} @@ -216,6 +217,7 @@ static int __init feat_enable_hv(struct dt_cpu_feature *f)  	}  	mtspr(SPRN_LPID, 0); +	mtspr(SPRN_AMOR, ~0);  	lpcr = mfspr(SPRN_LPCR);  	lpcr &=  ~LPCR_LPES0; /* HV external interrupts */ @@ -271,6 +273,9 @@ static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f)  {  	u64 lpcr; +	if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) +		return 0; +  	lpcr = mfspr(SPRN_LPCR);  	lpcr &= ~LPCR_ISL; @@ -290,6 +295,9 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)  {  	u64 lpcr; +	if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) +		return 0; +  	lpcr = mfspr(SPRN_LPCR);  	lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);  	mtspr(SPRN_LPCR, lpcr); @@ -303,15 +311,15 @@ static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)  static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)  { -#ifdef CONFIG_PPC_RADIX_MMU +	if (!IS_ENABLED(CONFIG_PPC_RADIX_MMU)) +		return 0; + +	cur_cpu_spec->mmu_features |= MMU_FTR_KERNEL_RO;  	cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; -	cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;  	cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;  	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;  	return 1; -#endif -	return 0;  }  static int __init feat_enable_dscr(struct dt_cpu_feature *f) @@ -336,7 +344,7 @@ static int __init feat_enable_dscr(struct dt_cpu_feature *f)  	return 1;  } -static void hfscr_pmu_enable(void) +static void __init hfscr_pmu_enable(void)  {  	u64 hfscr = mfspr(SPRN_HFSCR);  	hfscr |= PPC_BIT(60); @@ -351,7 +359,7 @@ static void init_pmu_power8(void)  	}  	mtspr(SPRN_MMCRA, 0); -	mtspr(SPRN_MMCR0, 0); +	mtspr(SPRN_MMCR0, MMCR0_FC);  	mtspr(SPRN_MMCR1, 0);  	mtspr(SPRN_MMCR2, 0);  	mtspr(SPRN_MMCRS, 0); @@ -390,7 +398,7 @@ static void init_pmu_power9(void)  		mtspr(SPRN_MMCRC, 0);  	mtspr(SPRN_MMCRA, 0); -	mtspr(SPRN_MMCR0, 0); +	mtspr(SPRN_MMCR0, MMCR0_FC);  	mtspr(SPRN_MMCR1, 0);  	mtspr(SPRN_MMCR2, 0);  } @@ -426,7 +434,7 @@ static void init_pmu_power10(void)  	mtspr(SPRN_MMCR3, 0);  	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); -	mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); +	mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);  }  static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index 9bdaaf7fddc9..2f9dbf8ad2ee 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -280,7 +280,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)  }  DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache); -void eeh_cache_debugfs_init(void) +void __init eeh_cache_debugfs_init(void)  {  	debugfs_create_file_unsafe("eeh_address_cache", 0400,  			arch_debugfs_dir, NULL, diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 350dab18e137..422f80b5b27b 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -905,18 +905,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe)  	}  #endif /* CONFIG_STACKTRACE */ +	eeh_for_each_pe(pe, tmp_pe) +		eeh_pe_for_each_dev(tmp_pe, edev, tmp) +			edev->mode &= ~EEH_DEV_NO_HANDLER; +  	eeh_pe_update_time_stamp(pe);  	pe->freeze_count++;  	if (pe->freeze_count > eeh_max_freezes) {  		pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",  		       pe->phb->global_number, pe->addr,  		       pe->freeze_count); -		result = PCI_ERS_RESULT_DISCONNECT; -	} -	eeh_for_each_pe(pe, tmp_pe) -		eeh_pe_for_each_dev(tmp_pe, edev, tmp) -			edev->mode &= ~EEH_DEV_NO_HANDLER; +		goto recover_failed; +	}  	/* Walk the various device drivers attached to this slot through  	 * a reset sequence, giving each an opportunity to do what it needs @@ -928,39 +929,38 @@ void eeh_handle_normal_event(struct eeh_pe *pe)  	 * the error. Override the result if necessary to have partially  	 * hotplug for this case.  	 */ -	if (result != PCI_ERS_RESULT_DISCONNECT) { -		pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", -			pe->freeze_count, eeh_max_freezes); -		pr_info("EEH: Notify device drivers to shutdown\n"); -		eeh_set_channel_state(pe, pci_channel_io_frozen); -		eeh_set_irq_state(pe, false); -		eeh_pe_report("error_detected(IO frozen)", pe, -			      eeh_report_error, &result); -		if ((pe->type & EEH_PE_PHB) && -		    result != PCI_ERS_RESULT_NONE && -		    result != PCI_ERS_RESULT_NEED_RESET) -			result = PCI_ERS_RESULT_NEED_RESET; -	} +	pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n", +		pe->freeze_count, eeh_max_freezes); +	pr_info("EEH: Notify device drivers to shutdown\n"); +	eeh_set_channel_state(pe, pci_channel_io_frozen); +	eeh_set_irq_state(pe, false); +	eeh_pe_report("error_detected(IO frozen)", pe, +		      eeh_report_error, &result); +	if (result == PCI_ERS_RESULT_DISCONNECT) +		goto recover_failed; + +	/* +	 * Error logged on a PHB are always fences which need a full +	 * PHB reset to clear so force that to happen. +	 */ +	if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE) +		result = PCI_ERS_RESULT_NEED_RESET;  	/* Get the current PCI slot state. This can take a long time,  	 * sometimes over 300 seconds for certain systems.  	 */ -	if (result != PCI_ERS_RESULT_DISCONNECT) { -		rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); -		if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { -			pr_warn("EEH: Permanent failure\n"); -			result = PCI_ERS_RESULT_DISCONNECT; -		} +	rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY * 1000); +	if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { +		pr_warn("EEH: Permanent failure\n"); +		goto recover_failed;  	}  	/* Since rtas may enable MMIO when posting the error log,  	 * don't post the error log until after all dev drivers  	 * have been informed.  	 */ -	if (result != PCI_ERS_RESULT_DISCONNECT) { -		pr_info("EEH: Collect temporary log\n"); -		eeh_slot_error_detail(pe, EEH_LOG_TEMP); -	} +	pr_info("EEH: Collect temporary log\n"); +	eeh_slot_error_detail(pe, EEH_LOG_TEMP);  	/* If all device drivers were EEH-unaware, then shut  	 * down all of the device drivers, and hope they @@ -970,9 +970,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)  		pr_info("EEH: Reset with hotplug activity\n");  		rc = eeh_reset_device(pe, bus, NULL, false);  		if (rc) { -			pr_warn("%s: Unable to reset, err=%d\n", -				__func__, rc); -			result = PCI_ERS_RESULT_DISCONNECT; +			pr_warn("%s: Unable to reset, err=%d\n", __func__, rc); +			goto recover_failed;  		}  	} @@ -980,10 +979,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)  	if (result == PCI_ERS_RESULT_CAN_RECOVER) {  		pr_info("EEH: Enable I/O for affected devices\n");  		rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); +		if (rc < 0) +			goto recover_failed; -		if (rc < 0) { -			result = PCI_ERS_RESULT_DISCONNECT; -		} else if (rc) { +		if (rc) {  			result = PCI_ERS_RESULT_NEED_RESET;  		} else {  			pr_info("EEH: Notify device drivers to resume I/O\n"); @@ -991,15 +990,13 @@ void eeh_handle_normal_event(struct eeh_pe *pe)  				      eeh_report_mmio_enabled, &result);  		}  	} - -	/* If all devices reported they can proceed, then re-enable DMA */  	if (result == PCI_ERS_RESULT_CAN_RECOVER) {  		pr_info("EEH: Enabled DMA for affected devices\n");  		rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); +		if (rc < 0) +			goto recover_failed; -		if (rc < 0) { -			result = PCI_ERS_RESULT_DISCONNECT; -		} else if (rc) { +		if (rc) {  			result = PCI_ERS_RESULT_NEED_RESET;  		} else {  			/* @@ -1017,16 +1014,15 @@ void eeh_handle_normal_event(struct eeh_pe *pe)  		pr_info("EEH: Reset without hotplug activity\n");  		rc = eeh_reset_device(pe, bus, &rmv_data, true);  		if (rc) { -			pr_warn("%s: Cannot reset, err=%d\n", -				__func__, rc); -			result = PCI_ERS_RESULT_DISCONNECT; -		} else { -			result = PCI_ERS_RESULT_NONE; -			eeh_set_channel_state(pe, pci_channel_io_normal); -			eeh_set_irq_state(pe, true); -			eeh_pe_report("slot_reset", pe, eeh_report_reset, -				      &result); +			pr_warn("%s: Cannot reset, err=%d\n", __func__, rc); +			goto recover_failed;  		} + +		result = PCI_ERS_RESULT_NONE; +		eeh_set_channel_state(pe, pci_channel_io_normal); +		eeh_set_irq_state(pe, true); +		eeh_pe_report("slot_reset", pe, eeh_report_reset, +			      &result);  	}  	if ((result == PCI_ERS_RESULT_RECOVERED) || @@ -1054,45 +1050,47 @@ void eeh_handle_normal_event(struct eeh_pe *pe)  		}  		pr_info("EEH: Recovery successful.\n"); -	} else  { -		/* -		 * About 90% of all real-life EEH failures in the field -		 * are due to poorly seated PCI cards. Only 10% or so are -		 * due to actual, failed cards. -		 */ -		pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" -		       "Please try reseating or replacing it\n", -			pe->phb->global_number, pe->addr); +		goto out; +	} -		eeh_slot_error_detail(pe, EEH_LOG_PERM); +recover_failed: +	/* +	 * About 90% of all real-life EEH failures in the field +	 * are due to poorly seated PCI cards. Only 10% or so are +	 * due to actual, failed cards. +	 */ +	pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n" +		"Please try reseating or replacing it\n", +		pe->phb->global_number, pe->addr); -		/* Notify all devices that they're about to go down. */ -		eeh_set_channel_state(pe, pci_channel_io_perm_failure); -		eeh_set_irq_state(pe, false); -		eeh_pe_report("error_detected(permanent failure)", pe, -			      eeh_report_failure, NULL); +	eeh_slot_error_detail(pe, EEH_LOG_PERM); -		/* Mark the PE to be removed permanently */ -		eeh_pe_state_mark(pe, EEH_PE_REMOVED); +	/* Notify all devices that they're about to go down. */ +	eeh_set_channel_state(pe, pci_channel_io_perm_failure); +	eeh_set_irq_state(pe, false); +	eeh_pe_report("error_detected(permanent failure)", pe, +		      eeh_report_failure, NULL); -		/* -		 * Shut down the device drivers for good. We mark -		 * all removed devices correctly to avoid access -		 * the their PCI config any more. -		 */ -		if (pe->type & EEH_PE_VF) { -			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); -			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); -		} else { -			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); -			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); +	/* Mark the PE to be removed permanently */ +	eeh_pe_state_mark(pe, EEH_PE_REMOVED); -			pci_lock_rescan_remove(); -			pci_hp_remove_devices(bus); -			pci_unlock_rescan_remove(); -			/* The passed PE should no longer be used */ -			return; -		} +	/* +	 * Shut down the device drivers for good. We mark +	 * all removed devices correctly to avoid access +	 * the their PCI config any more. +	 */ +	if (pe->type & EEH_PE_VF) { +		eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL); +		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); +	} else { +		eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true); +		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED); + +		pci_lock_rescan_remove(); +		pci_hp_remove_devices(bus); +		pci_unlock_rescan_remove(); +		/* The passed PE should no longer be used */ +		return;  	}  out: diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 61fdd53cdd9a..7748c278d13c 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -73,13 +73,39 @@ prepare_transfer_to_handler:  _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)  #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_E500 */ +#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32) +	.globl	__kuep_lock +__kuep_lock: +	lwz	r9, THREAD+THSR0(r2) +	update_user_segments_by_4 r9, r10, r11, r12 +	blr + +__kuep_unlock: +	lwz	r9, THREAD+THSR0(r2) +	rlwinm  r9,r9,0,~SR_NX +	update_user_segments_by_4 r9, r10, r11, r12 +	blr + +.macro	kuep_lock +	bl	__kuep_lock +.endm +.macro	kuep_unlock +	bl	__kuep_unlock +.endm +#else +.macro	kuep_lock +.endm +.macro	kuep_unlock +.endm +#endif +  	.globl	transfer_to_syscall  transfer_to_syscall:  	stw	r11, GPR1(r1)  	stw	r11, 0(r1)  	mflr	r12  	stw	r12, _LINK(r1) -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x  	rlwinm	r9,r9,0,14,12		/* clear MSR_WE (necessary?) */  #endif  	lis	r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ @@ -90,10 +116,10 @@ transfer_to_syscall:  	stw	r12,8(r1)  	stw	r2,_TRAP(r1)  	SAVE_GPR(0, r1) -	SAVE_4GPRS(3, r1) -	SAVE_2GPRS(7, r1) +	SAVE_GPRS(3, 8, r1)  	addi	r2,r10,-THREAD  	SAVE_NVGPRS(r1) +	kuep_lock  	/* Calling convention has r9 = orig r0, r10 = regs */  	addi	r10,r1,STACK_FRAME_OVERHEAD @@ -110,6 +136,7 @@ ret_from_syscall:  	cmplwi	cr0,r5,0  	bne-	2f  #endif /* CONFIG_PPC_47x */ +	kuep_unlock  	lwz	r4,_LINK(r1)  	lwz	r5,_CCR(r1)  	mtlr	r4 @@ -139,7 +166,7 @@ syscall_exit_finish:  	mtxer	r5  	lwz	r0,GPR0(r1)  	lwz	r3,GPR3(r1) -	REST_8GPRS(4,r1) +	REST_GPRS(4, 11, r1)  	lwz	r12,GPR12(r1)  	b	1b @@ -232,9 +259,9 @@ fast_exception_return:  	beq	3f			/* if not, we've got problems */  #endif -2:	REST_4GPRS(3, r11) +2:	REST_GPRS(3, 6, r11)  	lwz	r10,_CCR(r11) -	REST_2GPRS(1, r11) +	REST_GPRS(1, 2, r11)  	mtcr	r10  	lwz	r10,_LINK(r11)  	mtlr	r10 @@ -273,6 +300,7 @@ interrupt_return:  	beq	.Lkernel_interrupt_return  	bl	interrupt_exit_user_prepare  	cmpwi	r3,0 +	kuep_unlock  	bne-	.Lrestore_nvgprs  .Lfast_user_interrupt_return: @@ -298,16 +326,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)  	 * the reliable stack unwinder later on. Clear it.  	 */  	stw	r0,8(r1) -	REST_4GPRS(7, r1) -	REST_2GPRS(11, r1) +	REST_GPRS(7, 12, r1)  	mtcr	r3  	mtlr	r4  	mtctr	r5  	mtspr	SPRN_XER,r6 -	REST_4GPRS(2, r1) -	REST_GPR(6, r1) +	REST_GPRS(2, 6, r1)  	REST_GPR(0, r1)  	REST_GPR(1, r1)  	rfi @@ -341,8 +367,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)  	lwz	r6,_CCR(r1)  	li	r0,0 -	REST_4GPRS(7, r1) -	REST_2GPRS(11, r1) +	REST_GPRS(7, 12, r1)  	mtlr	r3  	mtctr	r4 @@ -354,7 +379,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)  	 */  	stw	r0,8(r1) -	REST_4GPRS(2, r1) +	REST_GPRS(2, 5, r1)  	bne-	cr1,1f /* emulate stack store */  	mtcr	r6 @@ -430,8 +455,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return)  	bne	interrupt_return;					\  	lwz	r0,GPR0(r1);						\  	lwz	r2,GPR2(r1);						\ -	REST_4GPRS(3, r1);						\ -	REST_2GPRS(7, r1);						\ +	REST_GPRS(3, 8, r1);						\  	lwz	r10,_XER(r1);						\  	lwz	r11,_CTR(r1);						\  	mtspr	SPRN_XER,r10;						\ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 70cff7b49e17..9581906b5ee9 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -180,7 +180,7 @@ _GLOBAL(_switch)  #endif  	ld	r8,KSP(r4)	/* new stack pointer */ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU  BEGIN_MMU_FTR_SECTION  	b	2f  END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) @@ -232,7 +232,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)  	slbmte	r7,r0  	isync  2: -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_64S_HASH_MMU */  	clrrdi	r7, r8, THREAD_SHIFT	/* base of new stack */  	/* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index 93b0f3ec8fb0..d4b8aff20815 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -37,7 +37,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node,  		return -1;  	for (i = 0; i < (len / 4); i++) { -		struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i])); +		ppc_inst_t inst = ppc_inst(be32_to_cpu(insts[i]));  		patch_instruction(epapr_hypercall_start + i, inst);  #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)  		patch_instruction(epapr_ev_idle_start + i, inst); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 711c66b76df1..67dc4e3179a0 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -198,8 +198,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)  	stdcx.	r0,0,r1		/* to clear the reservation */ -	REST_4GPRS(2, r1) -	REST_4GPRS(6, r1) +	REST_GPRS(2, 9, r1)  	ld	r10,_CTR(r1)  	ld	r11,_XER(r1) @@ -375,9 +374,7 @@ ret_from_mc_except:  exc_##n##_common:							    \  	std	r0,GPR0(r1);		/* save r0 in stackframe */	    \  	std	r2,GPR2(r1);		/* save r2 in stackframe */	    \ -	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe */    \ -	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe */	    \ -	std	r9,GPR9(r1);		/* save r9 in stackframe */	    \ +	SAVE_GPRS(3, 9, r1);		/* save r3 - r9 in stackframe */    \  	std	r10,_NIP(r1);		/* save SRR0 to stackframe */	    \  	std	r11,_MSR(r1);		/* save SRR1 to stackframe */	    \  	beq	2f;			/* if from kernel mode */	    \ @@ -1061,9 +1058,7 @@ bad_stack_book3e:  	std	r11,_ESR(r1)  	std	r0,GPR0(r1);		/* save r0 in stackframe */	    \  	std	r2,GPR2(r1);		/* save r2 in stackframe */	    \ -	SAVE_4GPRS(3, r1);		/* save r3 - r6 in stackframe */    \ -	SAVE_2GPRS(7, r1);		/* save r7, r8 in stackframe */	    \ -	std	r9,GPR9(r1);		/* save r9 in stackframe */	    \ +	SAVE_GPRS(3, 9, r1);		/* save r3 - r9 in stackframe */    \  	ld	r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */		    \  	ld	r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */		    \  	mfspr	r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \ @@ -1077,8 +1072,7 @@ bad_stack_book3e:  	std	r10,_LINK(r1)  	std	r11,_CTR(r1)  	std	r12,_XER(r1) -	SAVE_10GPRS(14,r1) -	SAVE_8GPRS(24,r1) +	SAVE_GPRS(14, 31, r1)  	lhz	r12,PACA_TRAP_SAVE(r13)  	std	r12,_TRAP(r1)  	addi	r11,r1,INT_FRAME_SIZE diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index eaf1f72131a1..55caeee37c08 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -48,7 +48,7 @@  	.balign IFETCH_ALIGN_BYTES;				\  	.global name;						\  	_ASM_NOKPROBE_SYMBOL(name);				\ -	DEFINE_FIXED_SYMBOL(name);				\ +	DEFINE_FIXED_SYMBOL(name, text);			\  name:  #define TRAMP_REAL_BEGIN(name)					\ @@ -76,31 +76,18 @@ name:  	ld	reg,PACAKBASE(r13);	/* get high part of &label */	\  	ori	reg,reg,FIXED_SYMBOL_ABS_ADDR(label) -#define __LOAD_HANDLER(reg, label)					\ +#define __LOAD_HANDLER(reg, label, section)					\  	ld	reg,PACAKBASE(r13);					\ -	ori	reg,reg,(ABS_ADDR(label))@l +	ori	reg,reg,(ABS_ADDR(label, section))@l  /*   * Branches from unrelocated code (e.g., interrupts) to labels outside   * head-y require >64K offsets.   */ -#define __LOAD_FAR_HANDLER(reg, label)					\ +#define __LOAD_FAR_HANDLER(reg, label, section)					\  	ld	reg,PACAKBASE(r13);					\ -	ori	reg,reg,(ABS_ADDR(label))@l;				\ -	addis	reg,reg,(ABS_ADDR(label))@h - -/* - * Branch to label using its 0xC000 address. This results in instruction - * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned - * on using mtmsr rather than rfid. - * - * This could set the 0xc bits for !RELOCATABLE as an immediate, rather than - * load KBASE for a slight optimisation. - */ -#define BRANCH_TO_C000(reg, label)					\ -	__LOAD_FAR_HANDLER(reg, label);					\ -	mtctr	reg;							\ -	bctr +	ori	reg,reg,(ABS_ADDR(label, section))@l;				\ +	addis	reg,reg,(ABS_ADDR(label, section))@h  /*   * Interrupt code generation macros @@ -111,9 +98,10 @@ name:  #define IAREA		.L_IAREA_\name\()	/* PACA save area */  #define IVIRT		.L_IVIRT_\name\()	/* Has virt mode entry point */  #define IISIDE		.L_IISIDE_\name\()	/* Uses SRR0/1 not DAR/DSISR */ +#define ICFAR		.L_ICFAR_\name\()	/* Uses CFAR */ +#define ICFAR_IF_HVMODE	.L_ICFAR_IF_HVMODE_\name\() /* Uses CFAR if HV */  #define IDAR		.L_IDAR_\name\()	/* Uses DAR (or SRR0) */  #define IDSISR		.L_IDSISR_\name\()	/* Uses DSISR (or SRR1) */ -#define ISET_RI		.L_ISET_RI_\name\()	/* Run common code w/ MSR[RI]=1 */  #define IBRANCH_TO_COMMON	.L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */  #define IREALMODE_COMMON	.L_IREALMODE_COMMON_\name\() /* Common runs in realmode */  #define IMASK		.L_IMASK_\name\()	/* IRQ soft-mask bit */ @@ -151,15 +139,18 @@ do_define_int n  	.ifndef IISIDE  		IISIDE=0  	.endif +	.ifndef ICFAR +		ICFAR=1 +	.endif +	.ifndef ICFAR_IF_HVMODE +		ICFAR_IF_HVMODE=0 +	.endif  	.ifndef IDAR  		IDAR=0  	.endif  	.ifndef IDSISR  		IDSISR=0  	.endif -	.ifndef ISET_RI -		ISET_RI=1 -	.endif  	.ifndef IBRANCH_TO_COMMON  		IBRANCH_TO_COMMON=1  	.endif @@ -291,9 +282,21 @@ BEGIN_FTR_SECTION  END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	HMT_MEDIUM  	std	r10,IAREA+EX_R10(r13)		/* save r10 - r12 */ +	.if ICFAR  BEGIN_FTR_SECTION  	mfspr	r10,SPRN_CFAR  END_FTR_SECTION_IFSET(CPU_FTR_CFAR) +	.elseif ICFAR_IF_HVMODE +BEGIN_FTR_SECTION +  BEGIN_FTR_SECTION_NESTED(69) +	mfspr	r10,SPRN_CFAR +  END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69) +FTR_SECTION_ELSE +  BEGIN_FTR_SECTION_NESTED(69) +	li	r10,0 +  END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69) +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +	.endif  	.if \ool  	.if !\virt  	b	tramp_real_\name @@ -309,9 +312,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)  BEGIN_FTR_SECTION  	std	r9,IAREA+EX_PPR(r13)  END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) +	.if ICFAR || ICFAR_IF_HVMODE  BEGIN_FTR_SECTION  	std	r10,IAREA+EX_CFAR(r13)  END_FTR_SECTION_IFSET(CPU_FTR_CFAR) +	.endif  	INTERRUPT_TO_KERNEL  	mfctr	r10  	std	r10,IAREA+EX_CTR(r13) @@ -376,7 +381,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)   * This switches to virtual mode and sets MSR[RI].   */  .macro __GEN_COMMON_ENTRY name -DEFINE_FIXED_SYMBOL(\name\()_common_real) +DEFINE_FIXED_SYMBOL(\name\()_common_real, text)  \name\()_common_real:  	.if IKVM_REAL  		KVMTEST \name kvm_interrupt @@ -399,7 +404,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)  	.endif  	.balign IFETCH_ALIGN_BYTES -DEFINE_FIXED_SYMBOL(\name\()_common_virt) +DEFINE_FIXED_SYMBOL(\name\()_common_virt, text)  \name\()_common_virt:  	.if IKVM_VIRT  		KVMTEST \name kvm_interrupt @@ -413,7 +418,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)   * want to run in real mode.   */  .macro __GEN_REALMODE_COMMON_ENTRY name -DEFINE_FIXED_SYMBOL(\name\()_common_real) +DEFINE_FIXED_SYMBOL(\name\()_common_real, text)  \name\()_common_real:  	.if IKVM_REAL  		KVMTEST \name kvm_interrupt @@ -512,11 +517,6 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)  	stb	r10,PACASRR_VALID(r13)  	.endif -	.if ISET_RI -	li	r10,MSR_RI -	mtmsrd	r10,1			/* Set MSR_RI */ -	.endif -  	.if ISTACK  	.if IKUAP  	kuap_save_amr_and_lock r9, r10, cr1, cr0 @@ -568,14 +568,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	.endif  BEGIN_FTR_SECTION +	.if ICFAR || ICFAR_IF_HVMODE  	ld	r10,IAREA+EX_CFAR(r13) +	.else +	li	r10,0 +	.endif  	std	r10,ORIG_GPR3(r1)  END_FTR_SECTION_IFSET(CPU_FTR_CFAR)  	ld	r10,IAREA+EX_CTR(r13)  	std	r10,_CTR(r1)  	std	r2,GPR2(r1)		/* save r2 in stackframe	*/ -	SAVE_4GPRS(3, r1)		/* save r3 - r6 in stackframe   */ -	SAVE_2GPRS(7, r1)		/* save r7, r8 in stackframe	*/ +	SAVE_GPRS(3, 8, r1)		/* save r3 - r8 in stackframe   */  	mflr	r9			/* Get LR, later save to stack	*/  	ld	r2,PACATOC(r13)		/* get kernel TOC into r2	*/  	std	r9,_LINK(r1) @@ -693,8 +696,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)  	mtlr	r9  	ld	r9,_CCR(r1)  	mtcr	r9 -	REST_8GPRS(2, r1) -	REST_4GPRS(10, r1) +	REST_GPRS(2, 13, r1)  	REST_GPR(0, r1)  	/* restore original r1. */  	ld	r1,GPR1(r1) @@ -850,12 +852,12 @@ SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000)  #ifdef CONFIG_RELOCATABLE  TRAMP_VIRT_BEGIN(system_call_vectored_tramp) -	__LOAD_HANDLER(r10, system_call_vectored_common) +	__LOAD_HANDLER(r10, system_call_vectored_common, virt_trampolines)  	mtctr	r10  	bctr  TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp) -	__LOAD_HANDLER(r10, system_call_vectored_sigill) +	__LOAD_HANDLER(r10, system_call_vectored_sigill, virt_trampolines)  	mtctr	r10  	bctr  #endif @@ -902,11 +904,6 @@ INT_DEFINE_BEGIN(system_reset)  	IVEC=0x100  	IAREA=PACA_EXNMI  	IVIRT=0 /* no virt entry point */ -	/* -	 * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is -	 * being used, so a nested NMI exception would corrupt it. -	 */ -	ISET_RI=0  	ISTACK=0  	IKVM_REAL=1  INT_DEFINE_END(system_reset) @@ -964,7 +961,9 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)  	/* We are waking up from idle, so may clobber any volatile register */  	cmpwi	cr1,r5,2  	bltlr	cr1	/* no state loss, return to idle caller with r3=SRR1 */ -	BRANCH_TO_C000(r12, DOTSYM(idle_return_gpr_loss)) +	__LOAD_FAR_HANDLER(r12, DOTSYM(idle_return_gpr_loss), real_trampolines) +	mtctr	r12 +	bctr  #endif  #ifdef CONFIG_PPC_PSERIES @@ -979,16 +978,14 @@ TRAMP_REAL_BEGIN(system_reset_fwnmi)  EXC_COMMON_BEGIN(system_reset_common)  	__GEN_COMMON_ENTRY system_reset  	/* -	 * Increment paca->in_nmi then enable MSR_RI. SLB or MCE will be able -	 * to recover, but nested NMI will notice in_nmi and not recover -	 * because of the use of the NMI stack. in_nmi reentrancy is tested in -	 * system_reset_exception. +	 * Increment paca->in_nmi. When the interrupt entry wrapper later +	 * enable MSR_RI, then SLB or MCE will be able to recover, but a nested +	 * NMI will notice in_nmi and not recover because of the use of the NMI +	 * stack. in_nmi reentrancy is tested in system_reset_exception.  	 */  	lhz	r10,PACA_IN_NMI(r13)  	addi	r10,r10,1  	sth	r10,PACA_IN_NMI(r13) -	li	r10,MSR_RI -	mtmsrd 	r10,1  	mr	r10,r1  	ld	r1,PACA_NMI_EMERG_SP(r13) @@ -1062,12 +1059,6 @@ INT_DEFINE_BEGIN(machine_check_early)  	IAREA=PACA_EXMC  	IVIRT=0 /* no virt entry point */  	IREALMODE_COMMON=1 -	/* -	 * MSR_RI is not enabled, because PACA_EXMC is being used, so a -	 * nested machine check corrupts it. machine_check_common enables -	 * MSR_RI. -	 */ -	ISET_RI=0  	ISTACK=0  	IDAR=1  	IDSISR=1 @@ -1078,7 +1069,6 @@ INT_DEFINE_BEGIN(machine_check)  	IVEC=0x200  	IAREA=PACA_EXMC  	IVIRT=0 /* no virt entry point */ -	ISET_RI=0  	IDAR=1  	IDSISR=1  	IKVM_REAL=1 @@ -1148,9 +1138,6 @@ EXC_COMMON_BEGIN(machine_check_early_common)  BEGIN_FTR_SECTION  	bl	enable_machine_check  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) -	li	r10,MSR_RI -	mtmsrd	r10,1 -  	addi	r3,r1,STACK_FRAME_OVERHEAD  	bl	machine_check_early  	std	r3,RESULT(r1)	/* Save result */ @@ -1238,10 +1225,6 @@ EXC_COMMON_BEGIN(machine_check_common)  	 * save area: PACA_EXMC instead of PACA_EXGEN.  	 */  	GEN_COMMON machine_check - -	/* Enable MSR_RI when finished with PACA_EXMC */ -	li	r10,MSR_RI -	mtmsrd 	r10,1  	addi	r3,r1,STACK_FRAME_OVERHEAD  	bl	machine_check_exception_async  	b	interrupt_return_srr @@ -1369,11 +1352,15 @@ EXC_COMMON_BEGIN(data_access_common)  	addi	r3,r1,STACK_FRAME_OVERHEAD  	andis.	r0,r4,DSISR_DABRMATCH@h  	bne-	1f +#ifdef CONFIG_PPC_64S_HASH_MMU  BEGIN_MMU_FTR_SECTION  	bl	do_hash_fault  MMU_FTR_SECTION_ELSE  	bl	do_page_fault  ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else +	bl	do_page_fault +#endif  	b	interrupt_return_srr  1:	bl	do_break @@ -1416,6 +1403,7 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)  EXC_VIRT_END(data_access_slb, 0x4380, 0x80)  EXC_COMMON_BEGIN(data_access_slb_common)  	GEN_COMMON data_access_slb +#ifdef CONFIG_PPC_64S_HASH_MMU  BEGIN_MMU_FTR_SECTION  	/* HPT case, do SLB fault */  	addi	r3,r1,STACK_FRAME_OVERHEAD @@ -1428,9 +1416,12 @@ MMU_FTR_SECTION_ELSE  	/* Radix case, access is outside page table range */  	li	r3,-EFAULT  ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else +	li	r3,-EFAULT +#endif  	std	r3,RESULT(r1)  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	do_bad_slb_fault +	bl	do_bad_segment_interrupt  	b	interrupt_return_srr @@ -1462,11 +1453,15 @@ EXC_VIRT_END(instruction_access, 0x4400, 0x80)  EXC_COMMON_BEGIN(instruction_access_common)  	GEN_COMMON instruction_access  	addi	r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PPC_64S_HASH_MMU  BEGIN_MMU_FTR_SECTION  	bl	do_hash_fault  MMU_FTR_SECTION_ELSE  	bl	do_page_fault  ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else +	bl	do_page_fault +#endif  	b	interrupt_return_srr @@ -1496,6 +1491,7 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)  EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)  EXC_COMMON_BEGIN(instruction_access_slb_common)  	GEN_COMMON instruction_access_slb +#ifdef CONFIG_PPC_64S_HASH_MMU  BEGIN_MMU_FTR_SECTION  	/* HPT case, do SLB fault */  	addi	r3,r1,STACK_FRAME_OVERHEAD @@ -1508,9 +1504,12 @@ MMU_FTR_SECTION_ELSE  	/* Radix case, access is outside page table range */  	li	r3,-EFAULT  ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) +#else +	li	r3,-EFAULT +#endif  	std	r3,RESULT(r1)  	addi	r3,r1,STACK_FRAME_OVERHEAD -	bl	do_bad_slb_fault +	bl	do_bad_segment_interrupt  	b	interrupt_return_srr @@ -1536,6 +1535,12 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)   *   * If soft masked, the masked handler will note the pending interrupt for   * replay, and clear MSR[EE] in the interrupted context. + * + * CFAR is not required because this is an asynchronous interrupt that in + * general won't have much bearing on the state of the CPU, with the possible + * exception of crash/debug IPIs, but those are generally moving to use SRESET + * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case + * it may be exiting the guest and need CFAR to be saved.   */  INT_DEFINE_BEGIN(hardware_interrupt)  	IVEC=0x500 @@ -1543,6 +1548,10 @@ INT_DEFINE_BEGIN(hardware_interrupt)  	IMASK=IRQS_DISABLED  	IKVM_REAL=1  	IKVM_VIRT=1 +	ICFAR=0 +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +	ICFAR_IF_HVMODE=1 +#endif  INT_DEFINE_END(hardware_interrupt)  EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) @@ -1764,6 +1773,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)   * If PPC_WATCHDOG is configured, the soft masked handler will actually set   * things back up to run soft_nmi_interrupt as a regular interrupt handler   * on the emergency stack. + * + * CFAR is not required because this is asynchronous (see hardware_interrupt). + * A watchdog interrupt may like to have CFAR, but usually the interesting + * branch is long gone by that point (e.g., infinite loop).   */  INT_DEFINE_BEGIN(decrementer)  	IVEC=0x900 @@ -1771,6 +1784,7 @@ INT_DEFINE_BEGIN(decrementer)  #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE  	IKVM_REAL=1  #endif +	ICFAR=0  INT_DEFINE_END(decrementer)  EXC_REAL_BEGIN(decrementer, 0x900, 0x80) @@ -1846,6 +1860,8 @@ EXC_COMMON_BEGIN(hdecrementer_common)   * If soft masked, the masked handler will note the pending interrupt for   * replay, leaving MSR[EE] enabled in the interrupted context because the   * doorbells are edge triggered. + * + * CFAR is not required, similarly to hardware_interrupt.   */  INT_DEFINE_BEGIN(doorbell_super)  	IVEC=0xa00 @@ -1853,6 +1869,7 @@ INT_DEFINE_BEGIN(doorbell_super)  #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE  	IKVM_REAL=1  #endif +	ICFAR=0  INT_DEFINE_END(doorbell_super)  EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100) @@ -1904,6 +1921,7 @@ INT_DEFINE_BEGIN(system_call)  	IVEC=0xc00  	IKVM_REAL=1  	IKVM_VIRT=1 +	ICFAR=0  INT_DEFINE_END(system_call)  .macro SYSTEM_CALL virt @@ -1942,12 +1960,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)  	HMT_MEDIUM  	.if ! \virt -	__LOAD_HANDLER(r10, system_call_common_real) +	__LOAD_HANDLER(r10, system_call_common_real, real_vectors)  	mtctr	r10  	bctr  	.else  #ifdef CONFIG_RELOCATABLE -	__LOAD_HANDLER(r10, system_call_common) +	__LOAD_HANDLER(r10, system_call_common, virt_vectors)  	mtctr	r10  	bctr  #else @@ -2001,7 +2019,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	 * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives  	 * outside the head section.  	 */ -	__LOAD_FAR_HANDLER(r10, kvmppc_hcall) +	__LOAD_FAR_HANDLER(r10, kvmppc_hcall, real_trampolines)  	mtctr   r10  	bctr  #else @@ -2202,6 +2220,11 @@ EXC_COMMON_BEGIN(hmi_exception_common)   * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.   * This is an asynchronous interrupt in response to a msgsnd doorbell.   * Similar to the 0xa00 doorbell but for host rather than guest. + * + * CFAR is not required (similar to doorbell_interrupt), unless KVM HV + * is enabled, in which case it may be a guest exit. Most PowerNV kernels + * include KVM support so it would be nice if this could be dynamically + * patched out if KVM was not currently running any guests.   */  INT_DEFINE_BEGIN(h_doorbell)  	IVEC=0xe80 @@ -2209,6 +2232,9 @@ INT_DEFINE_BEGIN(h_doorbell)  	IMASK=IRQS_DISABLED  	IKVM_REAL=1  	IKVM_VIRT=1 +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE +	ICFAR=0 +#endif  INT_DEFINE_END(h_doorbell)  EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20) @@ -2232,6 +2258,9 @@ EXC_COMMON_BEGIN(h_doorbell_common)   * Interrupt 0xea0 - Hypervisor Virtualization Interrupt.   * This is an asynchronous interrupt in response to an "external exception".   * Similar to 0x500 but for host only. + * + * Like h_doorbell, CFAR is only required for KVM HV because this can be + * a guest exit.   */  INT_DEFINE_BEGIN(h_virt_irq)  	IVEC=0xea0 @@ -2239,6 +2268,9 @@ INT_DEFINE_BEGIN(h_virt_irq)  	IMASK=IRQS_DISABLED  	IKVM_REAL=1  	IKVM_VIRT=1 +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE +	ICFAR=0 +#endif  INT_DEFINE_END(h_virt_irq)  EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20) @@ -2275,6 +2307,8 @@ EXC_VIRT_NONE(0x4ee0, 0x20)   *   * If soft masked, the masked handler will note the pending interrupt for   * replay, and clear MSR[EE] in the interrupted context. + * + * CFAR is not used by perf interrupts so not required.   */  INT_DEFINE_BEGIN(performance_monitor)  	IVEC=0xf00 @@ -2282,6 +2316,7 @@ INT_DEFINE_BEGIN(performance_monitor)  #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE  	IKVM_REAL=1  #endif +	ICFAR=0  INT_DEFINE_END(performance_monitor)  EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20) @@ -2706,6 +2741,7 @@ EXC_VIRT_NONE(0x5800, 0x100)  INT_DEFINE_BEGIN(soft_nmi)  	IVEC=0x900  	ISTACK=0 +	ICFAR=0  INT_DEFINE_END(soft_nmi)  /* @@ -3025,7 +3061,7 @@ USE_FIXED_SECTION(virt_trampolines)  	.align	7  	.globl	__end_interrupts  __end_interrupts: -DEFINE_FIXED_SYMBOL(__end_interrupts) +DEFINE_FIXED_SYMBOL(__end_interrupts, virt_trampolines)  CLOSE_FIXED_SECTION(real_vectors);  CLOSE_FIXED_SECTION(real_trampolines); diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index b7ceb041743c..d03e488cfe9c 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -251,7 +251,7 @@ bool is_fadump_reserved_mem_contiguous(void)  }  /* Print firmware assisted dump configurations for debugging purpose. */ -static void fadump_show_config(void) +static void __init fadump_show_config(void)  {  	int i; @@ -353,7 +353,7 @@ static __init u64 fadump_calculate_reserve_size(void)   * Calculate the total memory size required to be reserved for   * firmware-assisted dump registration.   */ -static unsigned long get_fadump_area_size(void) +static unsigned long __init get_fadump_area_size(void)  {  	unsigned long size = 0; @@ -462,7 +462,7 @@ static int __init fadump_get_boot_mem_regions(void)   * with the given memory range.   * False, otherwise.   */ -static bool overlaps_reserved_ranges(u64 base, u64 end, int *idx) +static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx)  {  	bool ret = false;  	int i; @@ -737,7 +737,7 @@ void crash_fadump(struct pt_regs *regs, const char *str)  	fw_dump.ops->fadump_trigger(fdh, str);  } -u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) +u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)  {  	struct elf_prstatus prstatus; @@ -752,7 +752,7 @@ u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)  	return buf;  } -void fadump_update_elfcore_header(char *bufp) +void __init fadump_update_elfcore_header(char *bufp)  {  	struct elf_phdr *phdr; @@ -770,7 +770,7 @@ void fadump_update_elfcore_header(char *bufp)  	return;  } -static void *fadump_alloc_buffer(unsigned long size) +static void *__init fadump_alloc_buffer(unsigned long size)  {  	unsigned long count, i;  	struct page *page; @@ -792,7 +792,7 @@ static void fadump_free_buffer(unsigned long vaddr, unsigned long size)  	free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);  } -s32 fadump_setup_cpu_notes_buf(u32 num_cpus) +s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus)  {  	/* Allocate buffer to hold cpu crash notes. */  	fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); @@ -1447,7 +1447,7 @@ static ssize_t release_mem_store(struct kobject *kobj,  }  /* Release the reserved memory and disable the FADump */ -static void unregister_fadump(void) +static void __init unregister_fadump(void)  {  	fadump_cleanup();  	fadump_release_memory(fw_dump.reserve_dump_area_start, @@ -1547,7 +1547,7 @@ ATTRIBUTE_GROUPS(fadump);  DEFINE_SHOW_ATTRIBUTE(fadump_region); -static void fadump_init_files(void) +static void __init fadump_init_files(void)  {  	int rc = 0; @@ -1641,6 +1641,14 @@ int __init setup_fadump(void)  	else if (fw_dump.reserve_dump_area_size)  		fw_dump.ops->fadump_init_mem_struct(&fw_dump); +	/* +	 * In case of panic, fadump is triggered via ppc_panic_event() +	 * panic notifier. Setting crash_kexec_post_notifiers to 'true' +	 * lets panic() function take crash friendly path before panic +	 * notifiers are invoked. +	 */ +	crash_kexec_post_notifiers = true; +  	return 1;  }  subsys_initcall(setup_fadump); diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index ba4afe3b5a9c..f71f2bbd4de6 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -81,7 +81,12 @@ EXPORT_SYMBOL(store_fp_state)   */  _GLOBAL(load_up_fpu)  	mfmsr	r5 +#ifdef CONFIG_PPC_BOOK3S_64 +	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */ +	ori	r5,r5,MSR_FP|MSR_RI +#else  	ori	r5,r5,MSR_FP +#endif  #ifdef CONFIG_VSX  BEGIN_FTR_SECTION  	oris	r5,r5,MSR_VSX@h diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index 349c4a820231..c3286260a7d1 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -115,8 +115,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)  	stw	r10,8(r1)  	li	r10, \trapno  	stw	r10,_TRAP(r1) -	SAVE_4GPRS(3, r1) -	SAVE_2GPRS(7, r1) +	SAVE_GPRS(3, 8, r1)  	SAVE_NVGPRS(r1)  	stw	r2,GPR2(r1)  	stw	r12,_NIP(r1) @@ -136,6 +135,12 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)  	andi.	r12,r9,MSR_PR  	bne	777f  	bl	prepare_transfer_to_handler +#ifdef CONFIG_PPC_KUEP +	b	778f +777: +	bl	__kuep_lock +778: +#endif  777:  #endif  .endm diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 7d72ee5ab387..b6c6d1de5fd5 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -27,6 +27,7 @@  #include <linux/init.h>  #include <linux/pgtable.h> +#include <linux/sizes.h>  #include <asm/processor.h>  #include <asm/page.h>  #include <asm/mmu.h> @@ -297,6 +298,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)  3:  	mfspr	r11,SPRN_SPRG_THREAD  	lwz	r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP +	rlwinm.	r9, r9, 0, 0xff +	beq	5f			/* Kuap fault */ +#endif  4:  	tophys(r11, r11)  	rlwimi	r11, r10, 12, 20, 29	/* Create L1 (pgdir/pmd) address */ @@ -377,6 +382,10 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)  3:  	mfspr	r11,SPRN_SPRG_THREAD  	lwz	r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP +	rlwinm.	r9, r9, 0, 0xff +	beq	5f			/* Kuap fault */ +#endif  4:  	tophys(r11, r11)  	rlwimi	r11, r10, 12, 20, 29	/* Create L1 (pgdir/pmd) address */ @@ -650,7 +659,7 @@ start_here:  	b	.		/* prevent prefetch past rfi */  /* Set up the initial MMU state so we can do the first level of - * kernel initialization.  This maps the first 16 MBytes of memory 1:1 + * kernel initialization.  This maps the first 32 MBytes of memory 1:1   * virtual to physical and more importantly sets the cache mode.   */  initial_mmu: @@ -687,6 +696,12 @@ initial_mmu:  	tlbwe	r4,r0,TLB_DATA		/* Load the data portion of the entry */  	tlbwe	r3,r0,TLB_TAG		/* Load the tag portion of the entry */ +	li	r0,62			/* TLB slot 62 */ +	addis	r4,r4,SZ_16M@h +	addis	r3,r3,SZ_16M@h +	tlbwe	r4,r0,TLB_DATA		/* Load the data portion of the entry */ +	tlbwe	r3,r0,TLB_TAG		/* Load the tag portion of the entry */ +  	isync  	/* Establish the exception vector base diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 02d2928d1e01..b73a56466903 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -334,6 +334,10 @@ interrupt_base:  	mfspr	r12,SPRN_MMUCR  	mfspr   r13,SPRN_PID		/* Get PID */  	rlwimi	r12,r13,0,24,31		/* Set TID */ +#ifdef CONFIG_PPC_KUAP +	cmpwi	r13,0 +	beq	2f			/* KUAP Fault */ +#endif  4:  	mtspr	SPRN_MMUCR,r12 @@ -444,6 +448,10 @@ interrupt_base:  	mfspr	r12,SPRN_MMUCR  	mfspr   r13,SPRN_PID		/* Get PID */  	rlwimi	r12,r13,0,24,31		/* Set TID */ +#ifdef CONFIG_PPC_KUAP +	cmpwi	r13,0 +	beq	2f			/* KUAP Fault */ +#endif  4:  	mtspr	SPRN_MMUCR,r12 @@ -532,10 +540,7 @@ finish_tlb_load_44x:  	andi.	r10,r12,_PAGE_USER		/* User page ? */  	beq	1f				/* nope, leave U bits empty */  	rlwimi	r11,r11,3,26,28			/* yes, copy S bits to U */ -#ifdef CONFIG_PPC_KUEP -0:	rlwinm	r11,r11,0,~PPC44x_TLB_SX	/* Clear SX if User page */ -	patch_site 0b, patch__tlb_44x_kuep -#endif +	rlwinm	r11,r11,0,~PPC44x_TLB_SX	/* Clear SX if User page */  1:	tlbwe	r11,r13,PPC44x_TLB_ATTRIB	/* Write ATTRIB */  	/* Done...restore registers and get out of here. @@ -575,6 +580,10 @@ finish_tlb_load_44x:  3:	mfspr	r11,SPRN_SPRG3  	lwz	r11,PGDIR(r11)  	mfspr   r12,SPRN_PID		/* Get PID */ +#ifdef CONFIG_PPC_KUAP +	cmpwi	r12,0 +	beq	2f			/* KUAP Fault */ +#endif  4:	mtspr	SPRN_MMUCR,r12		/* Set MMUCR */  	/* Mask of required permission bits. Note that while we @@ -672,6 +681,10 @@ finish_tlb_load_44x:  3:	mfspr	r11,SPRN_SPRG_THREAD  	lwz	r11,PGDIR(r11)  	mfspr   r12,SPRN_PID		/* Get PID */ +#ifdef CONFIG_PPC_KUAP +	cmpwi	r12,0 +	beq	2f			/* KUAP Fault */ +#endif  4:	mtspr	SPRN_MMUCR,r12		/* Set MMUCR */  	/* Make up the required permissions */ @@ -747,10 +760,7 @@ finish_tlb_load_47x:  	andi.	r10,r12,_PAGE_USER		/* User page ? */  	beq	1f				/* nope, leave U bits empty */  	rlwimi	r11,r11,3,26,28			/* yes, copy S bits to U */ -#ifdef CONFIG_PPC_KUEP -0:	rlwinm	r11,r11,0,~PPC47x_TLB2_SX	/* Clear SX if User page */ -	patch_site 0b, patch__tlb_47x_kuep -#endif +	rlwinm	r11,r11,0,~PPC47x_TLB2_SX	/* Clear SX if User page */  1:	tlbwe	r11,r13,2  	/* Done...restore registers and get out of here. diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index f17ae2083733..5c5181e8d5f1 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -126,7 +126,7 @@ __secondary_hold_acknowledge:  	. = 0x5c  	.globl	__run_at_load  __run_at_load: -DEFINE_FIXED_SYMBOL(__run_at_load) +DEFINE_FIXED_SYMBOL(__run_at_load, first_256B)  	.long	RUN_AT_LOAD_DEFAULT  #endif @@ -156,7 +156,7 @@ __secondary_hold:  	/* Tell the master cpu we're here */  	/* Relocation is off & we are located at an address less */  	/* than 0x100, so only need to grab low order offset.    */ -	std	r24,(ABS_ADDR(__secondary_hold_acknowledge))(0) +	std	r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0)  	sync  	li	r26,0 @@ -164,7 +164,7 @@ __secondary_hold:  	tovirt(r26,r26)  #endif  	/* All secondary cpus wait here until told to start. */ -100:	ld	r12,(ABS_ADDR(__secondary_hold_spinloop))(r26) +100:	ld	r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(r26)  	cmpdi	0,r12,0  	beq	100b @@ -649,15 +649,15 @@ __after_prom_start:  3:  #endif  	/* # bytes of memory to copy */ -	lis	r5,(ABS_ADDR(copy_to_here))@ha -	addi	r5,r5,(ABS_ADDR(copy_to_here))@l +	lis	r5,(ABS_ADDR(copy_to_here, text))@ha +	addi	r5,r5,(ABS_ADDR(copy_to_here, text))@l  	bl	copy_and_flush		/* copy the first n bytes	 */  					/* this includes the code being	 */  					/* executed here.		 */  	/* Jump to the copy of this code that we just made */ -	addis	r8,r3,(ABS_ADDR(4f))@ha -	addi	r12,r8,(ABS_ADDR(4f))@l +	addis	r8,r3,(ABS_ADDR(4f, text))@ha +	addi	r12,r8,(ABS_ADDR(4f, text))@l  	mtctr	r12  	bctr @@ -669,8 +669,8 @@ p_end: .8byte _end - copy_to_here  	 * Now copy the rest of the kernel up to _end, add  	 * _end - copy_to_here to the copy limit and run again.  	 */ -	addis   r8,r26,(ABS_ADDR(p_end))@ha -	ld      r8,(ABS_ADDR(p_end))@l(r8) +	addis   r8,r26,(ABS_ADDR(p_end, text))@ha +	ld      r8,(ABS_ADDR(p_end, text))@l(r8)  	add	r5,r5,r8  5:	bl	copy_and_flush		/* copy the rest */ @@ -904,7 +904,7 @@ _GLOBAL(relative_toc)  	blr  .balign 8 -p_toc:	.8byte	__toc_start + 0x8000 - 0b +p_toc:	.8byte	.TOC. - 0b  /*   * This is where the main kernel code starts. diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 68e5c0a7e99d..fa84744d6b24 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -931,7 +931,11 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)  _GLOBAL(load_segment_registers)  	li	r0, NUM_USER_SEGMENTS /* load up user segment register values */  	mtctr	r0		/* for context 0 */ +#ifdef CONFIG_PPC_KUEP +	lis	r3, SR_NX@h	/* Kp = 0, Ks = 0, VSID = 0 */ +#else  	li	r3, 0		/* Kp = 0, Ks = 0, VSID = 0 */ +#endif  	li	r4, 0  3:	mtsrin	r3, r4  	addi	r3, r3, 0x111	/* increment VSID */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index ef8d1b1c234e..bb6d5d0fc4ac 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -87,8 +87,7 @@ END_BTB_FLUSH_SECTION  	stw	r10, 8(r1)  	li	r10, \trapno  	stw	r10,_TRAP(r1) -	SAVE_4GPRS(3, r1) -	SAVE_2GPRS(7, r1) +	SAVE_GPRS(3, 8, r1)  	SAVE_NVGPRS(r1)  	stw	r2,GPR2(r1)  	stw	r12,_NIP(r1) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 0a9a0f301474..ac2b4dcf5fd3 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -462,6 +462,12 @@ END_BTB_FLUSH_SECTION  	mfspr	r11,SPRN_SPRG_THREAD  	lwz	r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP +	mfspr	r12, SPRN_MAS1 +	rlwinm.	r12,r12,0,0x3fff0000 +	beq	2f			/* KUAP fault */ +#endif +  4:  	/* Mask of required permission bits. Note that while we  	 * do copy ESR:ST to _PAGE_RW position as trying to write @@ -571,6 +577,12 @@ END_BTB_FLUSH_SECTION  	mfspr	r11,SPRN_SPRG_THREAD  	lwz	r11,PGDIR(r11) +#ifdef CONFIG_PPC_KUAP +	mfspr	r12, SPRN_MAS1 +	rlwinm.	r12,r12,0,0x3fff0000 +	beq	2f			/* KUAP fault */ +#endif +  	/* Make up the required permissions for user code */  #ifdef CONFIG_PTE_64BIT  	li	r13,_PAGE_PRESENT | _PAGE_BAP_UX @@ -777,6 +789,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)  	andi.	r10, r11, _PAGE_USER	/* Test for _PAGE_USER */  	slwi	r10, r12, 1  	or	r10, r10, r12 +	rlwinm	r10, r10, 0, ~_PAGE_EXEC	/* Clear SX on user pages */  	iseleq	r12, r12, r10  	rlwimi	r13, r12, 0, 20, 31	/* Get RPN from PTE, merge w/ perms */  	mtspr	SPRN_MAS3, r13 diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 91a3be14808b..2669f80b3a49 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -523,7 +523,7 @@ static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info  static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,  			     struct arch_hw_breakpoint **info, int *hit, -			     struct ppc_inst instr) +			     ppc_inst_t instr)  {  	int i;  	int stepped; @@ -616,7 +616,7 @@ int hw_breakpoint_handler(struct die_args *args)  	int hit[HBP_NUM_MAX] = {0};  	int nr_hit = 0;  	bool ptrace_bp = false; -	struct ppc_inst instr = ppc_inst(0); +	ppc_inst_t instr = ppc_inst(0);  	int type = 0;  	int size = 0;  	unsigned long ea; diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c index 42b967e3d85c..a74623025f3a 100644 --- a/arch/powerpc/kernel/hw_breakpoint_constraints.c +++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c @@ -80,7 +80,7 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type,   * Return true if the event is valid wrt dawr configuration,   * including extraneous exception. Otherwise return false.   */ -bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr, +bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,  			  unsigned long ea, int type, int size,  			  struct arch_hw_breakpoint *info)  { @@ -127,7 +127,7 @@ bool wp_check_constraints(struct pt_regs *regs, struct ppc_inst instr,  	return false;  } -void wp_get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr, +void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,  			 int *type, int *size, unsigned long *ea)  {  	struct instruction_op op; diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 1f835539fda4..4ad79eb638c6 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -82,7 +82,7 @@ void power4_idle(void)  		return;  	if (cpu_has_feature(CPU_FTR_ALTIVEC)) -		asm volatile("DSSALL ; sync" ::: "memory"); +		asm volatile(PPC_DSSALL " ; sync" ::: "memory");  	power4_idle_nap(); diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S index 13cad9297d82..3c097356366b 100644 --- a/arch/powerpc/kernel/idle_6xx.S +++ b/arch/powerpc/kernel/idle_6xx.S @@ -129,7 +129,7 @@ BEGIN_FTR_SECTION  END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)  	mtspr	SPRN_HID0,r4  BEGIN_FTR_SECTION -	DSSALL +	PPC_DSSALL  	sync  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  	lwz	r8,TI_LOCAL_FLAGS(r2)	/* set napping bit */ diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 835b626cd476..7cd6ce3ec423 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -81,7 +81,7 @@ notrace long system_call_exception(long r3, long r4, long r5,  {  	syscall_fn f; -	kuep_lock(); +	kuap_lock();  	regs->orig_gpr3 = r3; @@ -148,7 +148,7 @@ notrace long system_call_exception(long r3, long r4, long r5,  	 */  	if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&  			unlikely(MSR_TM_TRANSACTIONAL(regs->msr))) -		current_thread_info()->flags |= _TIF_RESTOREALL; +		set_bits(_TIF_RESTOREALL, ¤t_thread_info()->flags);  	/*  	 * If the system call was made with a transaction active, doom it and @@ -181,7 +181,7 @@ notrace long system_call_exception(long r3, long r4, long r5,  	local_irq_enable(); -	if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { +	if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {  		if (unlikely(trap_is_unsupported_scv(regs))) {  			/* Unsupported scv vector */  			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip); @@ -343,7 +343,7 @@ interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)  	unsigned long ti_flags;  again: -	ti_flags = READ_ONCE(current_thread_info()->flags); +	ti_flags = read_thread_flags();  	while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {  		local_irq_enable();  		if (ti_flags & _TIF_NEED_RESCHED) { @@ -359,7 +359,7 @@ again:  			do_notify_resume(regs, ti_flags);  		}  		local_irq_disable(); -		ti_flags = READ_ONCE(current_thread_info()->flags); +		ti_flags = read_thread_flags();  	}  	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) { @@ -406,7 +406,6 @@ again:  	/* Restore user access locks last */  	kuap_user_restore(regs); -	kuep_unlock();  	return ret;  } @@ -437,7 +436,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,  	/* Check whether the syscall is issued inside a restartable sequence */  	rseq_syscall(regs); -	ti_flags = current_thread_info()->flags; +	ti_flags = read_thread_flags();  	if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {  		if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) { @@ -532,8 +531,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)  	unsigned long flags;  	unsigned long ret = 0;  	unsigned long kuap; -	bool stack_store = current_thread_info()->flags & -						_TIF_EMULATE_STACK_STORE; +	bool stack_store = read_thread_flags() & _TIF_EMULATE_STACK_STORE;  	if (regs_is_unrecoverable(regs))  		unrecoverable_exception(regs); @@ -554,7 +552,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)  again:  		if (IS_ENABLED(CONFIG_PREEMPT)) {  			/* Return to preemptible kernel context */ -			if (unlikely(current_thread_info()->flags & _TIF_NEED_RESCHED)) { +			if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {  				if (preempt_count() == 0)  					preempt_schedule_irq();  			} diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index ec950b08a8dc..92088f848266 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -30,21 +30,23 @@ COMPAT_SYS_CALL_TABLE:  	.ifc \srr,srr  	mfspr	r11,SPRN_SRR0  	ld	r12,_NIP(r1) +	clrrdi  r12,r12,2  100:	tdne	r11,r12 -	EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) +	EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)  	mfspr	r11,SPRN_SRR1  	ld	r12,_MSR(r1)  100:	tdne	r11,r12 -	EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) +	EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)  	.else  	mfspr	r11,SPRN_HSRR0  	ld	r12,_NIP(r1) +	clrrdi  r12,r12,2  100:	tdne	r11,r12 -	EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) +	EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)  	mfspr	r11,SPRN_HSRR1  	ld	r12,_MSR(r1)  100:	tdne	r11,r12 -	EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) +	EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)  	.endif  #endif  .endm @@ -162,10 +164,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	 * The value of AMR only matters while we're in the kernel.  	 */  	mtcr	r2 -	ld	r2,GPR2(r1) -	ld	r3,GPR3(r1) -	ld	r13,GPR13(r1) -	ld	r1,GPR1(r1) +	REST_GPRS(2, 3, r1) +	REST_GPR(13, r1) +	REST_GPR(1, r1)  	RFSCV_TO_USER  	b	.	/* prevent speculative execution */ @@ -183,9 +184,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	mtctr	r3  	mtlr	r4  	mtspr	SPRN_XER,r5 -	REST_10GPRS(2, r1) -	REST_2GPRS(12, r1) -	ld	r1,GPR1(r1) +	REST_GPRS(2, 13, r1) +	REST_GPR(1, r1)  	RFI_TO_USER  .Lsyscall_vectored_\name\()_rst_end: @@ -374,10 +374,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	 * The value of AMR only matters while we're in the kernel.  	 */  	mtcr	r2 -	ld	r2,GPR2(r1) -	ld	r3,GPR3(r1) -	ld	r13,GPR13(r1) -	ld	r1,GPR1(r1) +	REST_GPRS(2, 3, r1) +	REST_GPR(13, r1) +	REST_GPR(1, r1)  	RFI_TO_USER  	b	.	/* prevent speculative execution */ @@ -388,8 +387,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	mtctr	r3  	mtspr	SPRN_XER,r4  	ld	r0,GPR0(r1) -	REST_8GPRS(4, r1) -	ld	r12,GPR12(r1) +	REST_GPRS(4, 12, r1)  	b	.Lsyscall_restore_regs_cont  .Lsyscall_rst_end: @@ -518,17 +516,14 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)  	ld	r6,_XER(r1)  	li	r0,0 -	REST_4GPRS(7, r1) -	REST_2GPRS(11, r1) -	REST_GPR(13, r1) +	REST_GPRS(7, 13, r1)  	mtcr	r3  	mtlr	r4  	mtctr	r5  	mtspr	SPRN_XER,r6 -	REST_4GPRS(2, r1) -	REST_GPR(6, r1) +	REST_GPRS(2, 6, r1)  	REST_GPR(0, r1)  	REST_GPR(1, r1)  	.ifc \srr,srr @@ -625,8 +620,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)  	ld	r6,_CCR(r1)  	li	r0,0 -	REST_4GPRS(7, r1) -	REST_2GPRS(11, r1) +	REST_GPRS(7, 12, r1)  	mtlr	r3  	mtctr	r4 @@ -638,7 +632,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)  	 */  	std	r0,STACK_FRAME_OVERHEAD-16(r1) -	REST_4GPRS(2, r1) +	REST_GPRS(2, 5, r1)  	bne-	cr1,1f /* emulate stack store */  	mtcr	r6 @@ -703,7 +697,7 @@ interrupt_return_macro hsrr  	.globl __end_soft_masked  __end_soft_masked: -DEFINE_FIXED_SYMBOL(__end_soft_masked) +DEFINE_FIXED_SYMBOL(__end_soft_masked, text)  #endif /* CONFIG_PPC_BOOK3S */  #ifdef CONFIG_PPC_BOOK3S diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c4f1d6b7d992..2cf31a97126c 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -745,7 +745,8 @@ void __do_irq(struct pt_regs *regs)  	irq = ppc_md.get_irq();  	/* We can hard enable interrupts now to allow perf interrupts */ -	may_hard_irq_enable(); +	if (should_hard_irq_enable()) +		do_hard_irq_enable();  	/* And finally process it */  	if (unlikely(!irq)) @@ -811,7 +812,7 @@ void __init init_IRQ(void)  		ppc_md.init_IRQ();  } -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x  void   *critirq_ctx[NR_CPUS] __read_mostly;  void    *dbgirq_ctx[NR_CPUS] __read_mostly;  void *mcheckirq_ctx[NR_CPUS] __read_mostly; diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index bdee7262c080..9f8d0fa7b718 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -48,7 +48,7 @@ static struct hard_trap_info  	{ 0x0800, 0x08 /* SIGFPE */  },		/* fp unavailable */  	{ 0x0900, 0x0e /* SIGALRM */ },		/* decrementer */  	{ 0x0c00, 0x14 /* SIGCHLD */ },		/* system call */ -#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +#ifdef CONFIG_BOOKE_OR_40x  	{ 0x2002, 0x05 /* SIGTRAP */ },		/* debug */  #if defined(CONFIG_FSL_BOOKE)  	{ 0x2010, 0x08 /* SIGFPE */  },		/* spe unavailable */ @@ -67,7 +67,7 @@ static struct hard_trap_info  	{ 0x2010, 0x08 /* SIGFPE */  },		/* fp unavailable */  	{ 0x2020, 0x08 /* SIGFPE */  },		/* ap unavailable */  #endif -#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */ +#else /* !CONFIG_BOOKE_OR_40x */  	{ 0x0d00, 0x05 /* SIGTRAP */ },		/* single-step */  #if defined(CONFIG_PPC_8xx)  	{ 0x1000, 0x04 /* SIGILL */  },		/* software emulation */ diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 86d77ff056a6..9a492fdec1df 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -124,7 +124,7 @@ int arch_prepare_kprobe(struct kprobe *p)  {  	int ret = 0;  	struct kprobe *prev; -	struct ppc_inst insn = ppc_inst_read(p->addr); +	ppc_inst_t insn = ppc_inst_read(p->addr);  	if ((unsigned long)p->addr & 0x03) {  		printk("Attempt to register kprobe at an unaligned address\n"); @@ -244,7 +244,7 @@ NOKPROBE_SYMBOL(arch_prepare_kretprobe);  static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)  {  	int ret; -	struct ppc_inst insn = ppc_inst_read(p->ainsn.insn); +	ppc_inst_t insn = ppc_inst_read(p->ainsn.insn);  	/* regs->nip is also adjusted if emulate_step returns 1 */  	ret = emulate_step(regs, insn); diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 225511d73bef..f2e03ed423d0 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -96,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR)  	/* Stop DST streams */  BEGIN_FTR_SECTION -	DSSALL +	PPC_DSSALL  	sync  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) @@ -292,7 +292,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)  	isync  	/* Stop DST streams */ -	DSSALL +	PPC_DSSALL  	sync  	/* Get the current enable bit of the L3CR into r4 */ @@ -401,7 +401,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR)  _GLOBAL(__flush_disable_L1)  	/* Stop pending alitvec streams and memory accesses */  BEGIN_FTR_SECTION -	DSSALL +	PPC_DSSALL  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)   	sync diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index fd829f7f25a4..2503dd4713b9 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -586,7 +586,7 @@ void machine_check_print_event_info(struct machine_check_event *evt,  		mc_error_class[evt->error_class] : "Unknown";  	printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype); -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU  	/* Display faulty slb contents for SLB errors. */  	if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)  		slb_dump_contents(local_paca->mce_faulty_slbs); diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index c2f55fe7092d..71e8f2a92e36 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -77,15 +77,15 @@ static bool mce_in_guest(void)  }  /* flush SLBs and reload */ -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU  void flush_and_reload_slb(void)  { -	/* Invalidate all SLBs */ -	slb_flush_all_realmode(); -  	if (early_radix_enabled())  		return; +	/* Invalidate all SLBs */ +	slb_flush_all_realmode(); +  	/*  	 * This probably shouldn't happen, but it may be possible it's  	 * called in early boot before SLB shadows are allocated. @@ -99,7 +99,7 @@ void flush_and_reload_slb(void)  void flush_erat(void)  { -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU  	if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {  		flush_and_reload_slb();  		return; @@ -114,7 +114,7 @@ void flush_erat(void)  static int mce_flush(int what)  { -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU  	if (what == MCE_FLUSH_SLB) {  		flush_and_reload_slb();  		return 1; @@ -455,7 +455,7 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,  	 * in real-mode is tricky and can lead to recursive  	 * faults  	 */ -	struct ppc_inst instr; +	ppc_inst_t instr;  	unsigned long pfn, instr_addr;  	struct instruction_op op;  	struct pt_regs tmp = *regs; @@ -499,8 +499,10 @@ static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1,  			/* attempt to correct the error */  			switch (table[i].error_type) {  			case MCE_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU  				if (local_paca->in_mce == 1)  					slb_save_contents(local_paca->mce_faulty_slbs); +#endif  				handled = mce_flush(MCE_FLUSH_SLB);  				break;  			case MCE_ERROR_TYPE_ERAT: @@ -588,8 +590,10 @@ static int mce_handle_derror(struct pt_regs *regs,  			/* attempt to correct the error */  			switch (table[i].error_type) {  			case MCE_ERROR_TYPE_SLB: +#ifdef CONFIG_PPC_64S_HASH_MMU  				if (local_paca->in_mce == 1)  					slb_save_contents(local_paca->mce_faulty_slbs); +#endif  				if (mce_flush(MCE_FLUSH_SLB))  					handled = 1;  				break; diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index ed04a3ba66fe..40a583e9d3c7 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -90,16 +90,17 @@ int module_finalize(const Elf_Ehdr *hdr,  }  static __always_inline void * -__module_alloc(unsigned long size, unsigned long start, unsigned long end) +__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn)  {  	pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; +	gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0);  	/*  	 * Don't do huge page allocations for modules yet until more testing  	 * is done. STRICT_MODULE_RWX may require extra work to support this  	 * too.  	 */ -	return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, prot, +	return __vmalloc_node_range(size, 1, start, end, gfp, prot,  				    VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,  				    NUMA_NO_NODE, __builtin_return_address(0));  } @@ -114,13 +115,13 @@ void *module_alloc(unsigned long size)  	/* First try within 32M limit from _etext to avoid branch trampolines */  	if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) -		ptr = __module_alloc(size, limit, MODULES_END); +		ptr = __module_alloc(size, limit, MODULES_END, true);  	if (!ptr) -		ptr = __module_alloc(size, MODULES_VADDR, MODULES_END); +		ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false);  	return ptr;  #else -	return __module_alloc(size, VMALLOC_START, VMALLOC_END); +	return __module_alloc(size, VMALLOC_START, VMALLOC_END, false);  #endif  } diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index f417afc08d33..a491ad481d85 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -273,6 +273,31 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,  }  #ifdef CONFIG_DYNAMIC_FTRACE +int module_trampoline_target(struct module *mod, unsigned long addr, +			     unsigned long *target) +{ +	unsigned int jmp[4]; + +	/* Find where the trampoline jumps to */ +	if (copy_from_kernel_nofault(jmp, (void *)addr, sizeof(jmp))) +		return -EFAULT; + +	/* verify that this is what we expect it to be */ +	if ((jmp[0] & 0xffff0000) != PPC_RAW_LIS(_R12, 0) || +	    (jmp[1] & 0xffff0000) != PPC_RAW_ADDI(_R12, _R12, 0) || +	    jmp[2] != PPC_RAW_MTCTR(_R12) || +	    jmp[3] != PPC_RAW_BCTR()) +		return -EINVAL; + +	addr = (jmp[1] & 0xffff) | ((jmp[0] & 0xffff) << 16); +	if (addr & 0x8000) +		addr -= 0x10000; + +	*target = addr; + +	return 0; +} +  int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)  {  	module->arch.tramp = do_plt_call(module->core_layout.base, @@ -281,6 +306,14 @@ int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)  	if (!module->arch.tramp)  		return -ENOENT; +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +	module->arch.tramp_regs = do_plt_call(module->core_layout.base, +					      (unsigned long)ftrace_regs_caller, +					      sechdrs, module); +	if (!module->arch.tramp_regs) +		return -ENOENT; +#endif +  	return 0;  }  #endif diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 3c8d9bbb51cf..0d9f9cd41e13 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -540,7 +540,7 @@ static struct pstore_info nvram_pstore_info = {  	.write = nvram_pstore_write,  }; -static int nvram_pstore_init(void) +static int __init nvram_pstore_init(void)  {  	int rc = 0; @@ -562,7 +562,7 @@ static int nvram_pstore_init(void)  	return rc;  }  #else -static int nvram_pstore_init(void) +static int __init nvram_pstore_init(void)  {  	return -1;  } @@ -755,7 +755,7 @@ static unsigned char __init nvram_checksum(struct nvram_header *p)   * Per the criteria passed via nvram_remove_partition(), should this   * partition be removed?  1=remove, 0=keep   */ -static int nvram_can_remove_partition(struct nvram_partition *part, +static int __init nvram_can_remove_partition(struct nvram_partition *part,  		const char *name, int sig, const char *exceptions[])  {  	if (part->header.signature != sig) diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index ce1903064031..3b1c2236cbee 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -153,7 +153,7 @@ static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *ad  int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)  { -	struct ppc_inst branch_op_callback, branch_emulate_step, temp; +	ppc_inst_t branch_op_callback, branch_emulate_step, temp;  	unsigned long op_callback_addr, emulate_step_addr;  	kprobe_opcode_t *buff;  	long b_offset; @@ -228,12 +228,8 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)  	/*  	 * 3. load instruction to be emulated into relevant register, and  	 */ -	if (IS_ENABLED(CONFIG_PPC64)) { -		temp = ppc_inst_read(p->ainsn.insn); -		patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX); -	} else { -		patch_imm_load_insns((unsigned long)p->ainsn.insn, 4, buff + TMPL_INSN_IDX); -	} +	temp = ppc_inst_read(p->ainsn.insn); +	patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);  	/*  	 * 4. branch back from trampoline @@ -269,7 +265,7 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op)  void arch_optimize_kprobes(struct list_head *oplist)  { -	struct ppc_inst instr; +	ppc_inst_t instr;  	struct optimized_kprobe *op;  	struct optimized_kprobe *tmp; diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S index 19ea3312403c..5c7f0b4b784b 100644 --- a/arch/powerpc/kernel/optprobes_head.S +++ b/arch/powerpc/kernel/optprobes_head.S @@ -10,8 +10,8 @@  #include <asm/asm-offsets.h>  #ifdef CONFIG_PPC64 -#define SAVE_30GPRS(base) SAVE_10GPRS(2,base); SAVE_10GPRS(12,base); SAVE_10GPRS(22,base) -#define REST_30GPRS(base) REST_10GPRS(2,base); REST_10GPRS(12,base); REST_10GPRS(22,base) +#define SAVE_30GPRS(base) SAVE_GPRS(2, 31, base) +#define REST_30GPRS(base) REST_GPRS(2, 31, base)  #define TEMPLATE_FOR_IMM_LOAD_INSNS	nop; nop; nop; nop; nop  #else  #define SAVE_30GPRS(base) stmw	r2, GPR2(base) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 4208b4044d12..39da688a9455 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -139,8 +139,7 @@ static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)  }  #endif /* CONFIG_PPC_PSERIES */ -#ifdef CONFIG_PPC_BOOK3S_64 - +#ifdef CONFIG_PPC_64S_HASH_MMU  /*   * 3 persistent SLBs are allocated here.  The buffer will be zero   * initially, hence will all be invaild until we actually write them. @@ -169,8 +168,7 @@ static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)  	return s;  } - -#endif /* CONFIG_PPC_BOOK3S_64 */ +#endif /* CONFIG_PPC_64S_HASH_MMU */  #ifdef CONFIG_PPC_PSERIES  /** @@ -226,7 +224,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)  	new_paca->kexec_state = KEXEC_STATE_NONE;  	new_paca->__current = &init_task;  	new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU  	new_paca->slb_shadow_ptr = NULL;  #endif @@ -307,7 +305,7 @@ void __init allocate_paca(int cpu)  #ifdef CONFIG_PPC_PSERIES  	paca->lppaca_ptr = new_lppaca(cpu, limit);  #endif -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU  	paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);  #endif  #ifdef CONFIG_PPC_PSERIES @@ -328,7 +326,7 @@ void __init free_unused_pacas(void)  	paca_nr_cpu_ids = nr_cpu_ids;  	paca_ptrs_size = new_ptrs_size; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU  	if (early_radix_enabled()) {  		/* Ugly fixup, see new_slb_shadow() */  		memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr), @@ -341,9 +339,9 @@ void __init free_unused_pacas(void)  			paca_ptrs_size + paca_struct_size, nr_cpu_ids);  } +#ifdef CONFIG_PPC_64S_HASH_MMU  void copy_mm_to_paca(struct mm_struct *mm)  { -#ifdef CONFIG_PPC_BOOK3S  	mm_context_t *context = &mm->context;  #ifdef CONFIG_PPC_MM_SLICES @@ -356,7 +354,5 @@ void copy_mm_to_paca(struct mm_struct *mm)  	get_paca()->mm_ctx_user_psize = context->user_psize;  	get_paca()->mm_ctx_sllp = context->sllp;  #endif -#else /* !CONFIG_PPC_BOOK3S */ -	return; -#endif  } +#endif /* CONFIG_PPC_64S_HASH_MMU */ diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 6749905932f4..8bc9cf62cd93 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -62,7 +62,7 @@ EXPORT_SYMBOL(isa_mem_base);  static const struct dma_map_ops *pci_dma_ops; -void set_pci_dma_ops(const struct dma_map_ops *dma_ops) +void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops)  {  	pci_dma_ops = dma_ops;  } diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index b49e1060a3bf..48537964fba1 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -37,7 +37,7 @@ int pcibios_assign_bus_offset = 1;  EXPORT_SYMBOL(isa_io_base);  EXPORT_SYMBOL(pci_dram_offset); -void pcibios_make_OF_bus_map(void); +void __init pcibios_make_OF_bus_map(void);  static void fixup_cpc710_pci64(struct pci_dev* dev);  static u8* pci_to_OF_bus_map; @@ -109,7 +109,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus)  	}  } -void +void __init  pcibios_make_OF_bus_map(void)  {  	int i; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 406d7ee9e322..984813a4d5dc 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -628,7 +628,7 @@ static void do_break_handler(struct pt_regs *regs)  {  	struct arch_hw_breakpoint null_brk = {0};  	struct arch_hw_breakpoint *info; -	struct ppc_inst instr = ppc_inst(0); +	ppc_inst_t instr = ppc_inst(0);  	int type = 0;  	int size = 0;  	unsigned long ea; @@ -1156,6 +1156,40 @@ static inline void save_sprs(struct thread_struct *t)  #endif  } +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void kvmppc_save_user_regs(void) +{ +	unsigned long usermsr; + +	if (!current->thread.regs) +		return; + +	usermsr = current->thread.regs->msr; + +	if (usermsr & MSR_FP) +		save_fpu(current); + +	if (usermsr & MSR_VEC) +		save_altivec(current); + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +	if (usermsr & MSR_TM) { +		current->thread.tm_tfhar = mfspr(SPRN_TFHAR); +		current->thread.tm_tfiar = mfspr(SPRN_TFIAR); +		current->thread.tm_texasr = mfspr(SPRN_TEXASR); +		current->thread.regs->msr &= ~MSR_TM; +	} +#endif +} +EXPORT_SYMBOL_GPL(kvmppc_save_user_regs); + +void kvmppc_save_current_sprs(void) +{ +	save_sprs(¤t->thread); +} +EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs); +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ +  static inline void restore_sprs(struct thread_struct *old_thread,  				struct thread_struct *new_thread)  { @@ -1206,7 +1240,7 @@ struct task_struct *__switch_to(struct task_struct *prev,  {  	struct thread_struct *new_thread, *old_thread;  	struct task_struct *last; -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU  	struct ppc64_tlb_batch *batch;  #endif @@ -1215,7 +1249,7 @@ struct task_struct *__switch_to(struct task_struct *prev,  	WARN_ON(!irqs_disabled()); -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU  	batch = this_cpu_ptr(&ppc64_tlb_batch);  	if (batch->active) {  		current_thread_info()->local_flags |= _TLF_LAZY_MMU; @@ -1281,9 +1315,9 @@ struct task_struct *__switch_to(struct task_struct *prev,  	set_return_regs_changed(); /* _switch changes stack (and regs) */ -#ifdef CONFIG_PPC32 -	kuap_assert_locked(); -#endif +	if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) +		kuap_assert_locked(); +  	last = _switch(old_thread, new_thread);  	/* @@ -1294,6 +1328,7 @@ struct task_struct *__switch_to(struct task_struct *prev,  	 */  #ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU  	/*  	 * This applies to a process that was context switched while inside  	 * arch_enter_lazy_mmu_mode(), to re-activate the batch that was @@ -1305,6 +1340,7 @@ struct task_struct *__switch_to(struct task_struct *prev,  		batch = this_cpu_ptr(&ppc64_tlb_batch);  		batch->active = 1;  	} +#endif  	/*  	 * Math facilities are masked out of the child MSR in copy_thread. @@ -1655,7 +1691,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)  static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)  { -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU  	unsigned long sp_vsid;  	unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp; @@ -1767,6 +1803,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,  #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)  	p->thread.kuap = KUAP_NONE;  #endif +#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) +	p->thread.pid = MMU_NO_CONTEXT; +#endif  	setup_ksp_vsid(p, sp); @@ -2299,10 +2338,9 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)  	 * the heap, we can put it above 1TB so it is backed by a 1TB  	 * segment. Otherwise the heap will be in the bottom 1TB  	 * which always uses 256MB segments and this may result in a -	 * performance penalty. We don't need to worry about radix. For -	 * radix, mmu_highuser_ssize remains unchanged from 256MB. +	 * performance penalty.  	 */ -	if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T)) +	if (!radix_enabled() && !is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))  		base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);  #endif diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 2e67588f6f6e..3d30d40a0e9c 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -231,7 +231,7 @@ static void __init check_cpu_pa_features(unsigned long node)  		      ibm_pa_features, ARRAY_SIZE(ibm_pa_features));  } -#ifdef CONFIG_PPC_BOOK3S_64 +#ifdef CONFIG_PPC_64S_HASH_MMU  static void __init init_mmu_slb_size(unsigned long node)  {  	const __be32 *slb_size_ptr; @@ -402,7 +402,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,  	const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */  	/* Use common scan routine to determine if this is the chosen node */ -	if (early_init_dt_scan_chosen(node, uname, depth, data) == 0) +	if (early_init_dt_scan_chosen(data) < 0)  		return 0;  #ifdef CONFIG_PPC64 @@ -447,7 +447,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,   */  #ifdef CONFIG_SPARSEMEM -static bool validate_mem_limit(u64 base, u64 *size) +static bool __init validate_mem_limit(u64 base, u64 *size)  {  	u64 max_mem = 1UL << (MAX_PHYSMEM_BITS); @@ -458,7 +458,7 @@ static bool validate_mem_limit(u64 base, u64 *size)  	return true;  }  #else -static bool validate_mem_limit(u64 base, u64 *size) +static bool __init validate_mem_limit(u64 base, u64 *size)  {  	return true;  } @@ -532,19 +532,18 @@ static int  __init early_init_drmem_lmb(struct drmem_lmb *lmb,  }  #endif /* CONFIG_PPC_PSERIES */ -static int __init early_init_dt_scan_memory_ppc(unsigned long node, -						const char *uname, -						int depth, void *data) +static int __init early_init_dt_scan_memory_ppc(void)  {  #ifdef CONFIG_PPC_PSERIES -	if (depth == 1 && -	    strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) { +	const void *fdt = initial_boot_params; +	int node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory"); + +	if (node > 0)  		walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb); -		return 0; -	} +  #endif -	 -	return early_init_dt_scan_memory(node, uname, depth, data); + +	return early_init_dt_scan_memory();  }  /* @@ -748,8 +747,8 @@ void __init early_init_devtree(void *params)  	of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line);  	/* Scan memory nodes and rebuild MEMBLOCKs */ -	of_scan_flat_dt(early_init_dt_scan_root, NULL); -	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); +	early_init_dt_scan_root(); +	early_init_dt_scan_memory_ppc();  	parse_early_param(); @@ -857,8 +856,8 @@ void __init early_get_first_memblock_info(void *params, phys_addr_t *size)  	 * mess the memblock.  	 */  	add_mem_to_memblock = 0; -	of_scan_flat_dt(early_init_dt_scan_root, NULL); -	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); +	early_init_dt_scan_root(); +	early_init_dt_scan_memory_ppc();  	add_mem_to_memblock = 1;  	if (size) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 18b04b08b983..0ac5faacc909 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -672,7 +672,7 @@ static inline int __init prom_getproplen(phandle node, const char *pname)  	return call_prom("getproplen", 2, 1, node, ADDR(pname));  } -static void add_string(char **str, const char *q) +static void __init add_string(char **str, const char *q)  {  	char *p = *str; @@ -682,7 +682,7 @@ static void add_string(char **str, const char *q)  	*str = p;  } -static char *tohex(unsigned int x) +static char *__init tohex(unsigned int x)  {  	static const char digits[] __initconst = "0123456789abcdef";  	static char result[9] __prombss; @@ -728,7 +728,7 @@ static int __init prom_setprop(phandle node, const char *nodename,  #define prom_islower(c)	('a' <= (c) && (c) <= 'z')  #define prom_toupper(c)	(prom_islower(c) ? ((c) - 'a' + 'A') : (c)) -static unsigned long prom_strtoul(const char *cp, const char **endp) +static unsigned long __init prom_strtoul(const char *cp, const char **endp)  {  	unsigned long result = 0, base = 10, value; @@ -753,7 +753,7 @@ static unsigned long prom_strtoul(const char *cp, const char **endp)  	return result;  } -static unsigned long prom_memparse(const char *ptr, const char **retptr) +static unsigned long __init prom_memparse(const char *ptr, const char **retptr)  {  	unsigned long ret = prom_strtoul(ptr, retptr);  	int shift = 0; @@ -1786,7 +1786,7 @@ static void __init prom_close_stdin(void)  }  #ifdef CONFIG_PPC_SVM -static int prom_rtas_hcall(uint64_t args) +static int __init prom_rtas_hcall(uint64_t args)  {  	register uint64_t arg1 asm("r3") = H_RTAS;  	register uint64_t arg2 asm("r4") = args; @@ -2991,7 +2991,7 @@ static void __init fixup_device_tree_efika_add_phy(void)  	/* Check if the phy-handle property exists - bail if it does */  	rv = prom_getprop(node, "phy-handle", prop, sizeof(prop)); -	if (!rv) +	if (rv <= 0)  		return;  	/* @@ -3248,7 +3248,7 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)  /*   * Perform the Enter Secure Mode ultracall.   */ -static int enter_secure_mode(unsigned long kbase, unsigned long fdt) +static int __init enter_secure_mode(unsigned long kbase, unsigned long fdt)  {  	register unsigned long r3 asm("r3") = UV_ESM;  	register unsigned long r4 asm("r4") = kbase; diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 7c7093c17c45..c43f77e2ac31 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -260,8 +260,7 @@ long do_syscall_trace_enter(struct pt_regs *regs)  {  	u32 flags; -	flags = READ_ONCE(current_thread_info()->flags) & -		(_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE); +	flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);  	if (flags) {  		int rc = tracehook_report_syscall_entry(regs); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index ff80bbad22a5..733e6ef36758 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -492,8 +492,25 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)  }  EXPORT_SYMBOL(rtas_call); -/* For RTAS_BUSY (-2), delay for 1 millisecond.  For an extended busy status - * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds. +/** + * rtas_busy_delay_time() - From an RTAS status value, calculate the + *                          suggested delay time in milliseconds. + * + * @status: a value returned from rtas_call() or similar APIs which return + *          the status of a RTAS function call. + * + * Context: Any context. + * + * Return: + * * 100000 - If @status is 9905. + * * 10000  - If @status is 9904. + * * 1000   - If @status is 9903. + * * 100    - If @status is 9902. + * * 10     - If @status is 9901. + * * 1      - If @status is either 9900 or -2. This is "wrong" for -2, but + *            some callers depend on this behavior, and the worst outcome + *            is that they will delay for longer than necessary. + * * 0      - If @status is not a busy or extended delay value.   */  unsigned int rtas_busy_delay_time(int status)  { @@ -513,17 +530,77 @@ unsigned int rtas_busy_delay_time(int status)  }  EXPORT_SYMBOL(rtas_busy_delay_time); -/* For an RTAS busy status code, perform the hinted delay. */ -unsigned int rtas_busy_delay(int status) +/** + * rtas_busy_delay() - helper for RTAS busy and extended delay statuses + * + * @status: a value returned from rtas_call() or similar APIs which return + *          the status of a RTAS function call. + * + * Context: Process context. May sleep or schedule. + * + * Return: + * * true  - @status is RTAS_BUSY or an extended delay hint. The + *           caller may assume that the CPU has been yielded if necessary, + *           and that an appropriate delay for @status has elapsed. + *           Generally the caller should reattempt the RTAS call which + *           yielded @status. + * + * * false - @status is not @RTAS_BUSY nor an extended delay hint. The + *           caller is responsible for handling @status. + */ +bool rtas_busy_delay(int status)  {  	unsigned int ms; +	bool ret; -	might_sleep(); -	ms = rtas_busy_delay_time(status); -	if (ms && need_resched()) -		msleep(ms); +	switch (status) { +	case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: +		ret = true; +		ms = rtas_busy_delay_time(status); +		/* +		 * The extended delay hint can be as high as 100 seconds. +		 * Surely any function returning such a status is either +		 * buggy or isn't going to be significantly slowed by us +		 * polling at 1HZ. Clamp the sleep time to one second. +		 */ +		ms = clamp(ms, 1U, 1000U); +		/* +		 * The delay hint is an order-of-magnitude suggestion, not +		 * a minimum. It is fine, possibly even advantageous, for +		 * us to pause for less time than hinted. For small values, +		 * use usleep_range() to ensure we don't sleep much longer +		 * than actually needed. +		 * +		 * See Documentation/timers/timers-howto.rst for +		 * explanation of the threshold used here. In effect we use +		 * usleep_range() for 9900 and 9901, msleep() for +		 * 9902-9905. +		 */ +		if (ms <= 20) +			usleep_range(ms * 100, ms * 1000); +		else +			msleep(ms); +		break; +	case RTAS_BUSY: +		ret = true; +		/* +		 * We should call again immediately if there's no other +		 * work to do. +		 */ +		cond_resched(); +		break; +	default: +		ret = false; +		/* +		 * Not a busy or extended delay status; the caller should +		 * handle @status itself. Ensure we warn on misuses in +		 * atomic context regardless. +		 */ +		might_sleep(); +		break; +	} -	return ms; +	return ret;  }  EXPORT_SYMBOL(rtas_busy_delay); @@ -809,13 +886,13 @@ void rtas_os_term(char *str)  /**   * rtas_activate_firmware() - Activate a new version of firmware.   * + * Context: This function may sleep. + *   * Activate a new version of partition firmware. The OS must call this   * after resuming from a partition hibernation or migration in order   * to maintain the ability to perform live firmware updates. It's not   * catastrophic for this method to be absent or to fail; just log the   * condition in that case. - * - * Context: This function may sleep.   */  void rtas_activate_firmware(void)  { @@ -890,11 +967,12 @@ int rtas_call_reentrant(int token, int nargs, int nret, int *outputs, ...)  #endif /* CONFIG_PPC_PSERIES */  /** - * Find a specific pseries error log in an RTAS extended event log. + * get_pseries_errorlog() - Find a specific pseries error log in an RTAS + *                          extended event log.   * @log: RTAS error/event log   * @section_id: two character section identifier   * - * Returns a pointer to the specified errorlog or NULL if not found. + * Return: A pointer to the specified errorlog or NULL if not found.   */  struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,  					      uint16_t section_id) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 32ee17753eb4..cf0f42909ddf 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -455,7 +455,7 @@ static void rtas_event_scan(struct work_struct *w)  }  #ifdef CONFIG_PPC64 -static void retrieve_nvram_error_log(void) +static void __init retrieve_nvram_error_log(void)  {  	unsigned int err_type ;  	int rc ; @@ -473,12 +473,12 @@ static void retrieve_nvram_error_log(void)  	}  }  #else /* CONFIG_PPC64 */ -static void retrieve_nvram_error_log(void) +static void __init retrieve_nvram_error_log(void)  {  }  #endif /* CONFIG_PPC64 */ -static void start_event_scan(void) +static void __init start_event_scan(void)  {  	printk(KERN_DEBUG "RTAS daemon started\n");  	pr_debug("rtasd: will sleep for %d milliseconds\n", diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 15fb5ea1b9ea..e159d4093d98 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -44,7 +44,7 @@ static void enable_barrier_nospec(bool enable)  	do_barrier_nospec_fixups(enable);  } -void setup_barrier_nospec(void) +void __init setup_barrier_nospec(void)  {  	bool enable; @@ -132,7 +132,7 @@ early_param("nospectre_v2", handle_nospectre_v2);  #endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */  #ifdef CONFIG_PPC_FSL_BOOK3E -void setup_spectre_v2(void) +void __init setup_spectre_v2(void)  {  	if (no_spectrev2 || cpu_mitigations_off())  		do_btb_flush_fixups(); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4f1322b65760..f8da937df918 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -582,7 +582,7 @@ static __init int add_pcspkr(void)  device_initcall(add_pcspkr);  #endif	/* CONFIG_PCSPKR_PLATFORM */ -void probe_machine(void) +static __init void probe_machine(void)  {  	extern struct machdep_calls __machine_desc_start;  	extern struct machdep_calls __machine_desc_end; diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 84058bbc8fe9..93f22da12abe 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -29,7 +29,7 @@ void setup_tlb_core_data(void);  static inline void setup_tlb_core_data(void) { }  #endif -#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x  void exc_lvl_early_init(void);  #else  static inline void exc_lvl_early_init(void) { } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 7ec5c47fce0e..a6e9d36d7c01 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -75,7 +75,7 @@ EXPORT_SYMBOL(DMA_MODE_WRITE);  notrace void __init machine_init(u64 dt_ptr)  {  	u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache); -	struct ppc_inst insn; +	ppc_inst_t insn;  	/* Configure static keys first, now that we're relocated. */  	setup_feature_keys(); @@ -175,7 +175,7 @@ void __init emergency_stack_init(void)  }  #endif -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x  void __init exc_lvl_early_init(void)  {  	unsigned int i, hw_cpu; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6052f5d5ded3..be8577ac9397 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -499,7 +499,7 @@ void smp_release_cpus(void)   * routines and/or provided to userland   */ -static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, +static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,  			    u32 bsize, u32 sets)  {  	info->size = size; @@ -771,50 +771,6 @@ void __init emergency_stack_init(void)  }  #ifdef CONFIG_SMP -/** - * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu - * @cpu: cpu to allocate for - * @size: size allocation in bytes - * @align: alignment - * - * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper - * does the right thing for NUMA regardless of the current - * configuration. - * - * RETURNS: - * Pointer to the allocated area on success, NULL on failure. - */ -static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size, -					size_t align) -{ -	const unsigned long goal = __pa(MAX_DMA_ADDRESS); -#ifdef CONFIG_NUMA -	int node = early_cpu_to_node(cpu); -	void *ptr; - -	if (!node_online(node) || !NODE_DATA(node)) { -		ptr = memblock_alloc_from(size, align, goal); -		pr_info("cpu %d has no node %d or node-local memory\n", -			cpu, node); -		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n", -			 cpu, size, __pa(ptr)); -	} else { -		ptr = memblock_alloc_try_nid(size, align, goal, -					     MEMBLOCK_ALLOC_ACCESSIBLE, node); -		pr_debug("per cpu data for cpu%d %lu bytes on node%d at " -			 "%016lx\n", cpu, size, node, __pa(ptr)); -	} -	return ptr; -#else -	return memblock_alloc_from(size, align, goal); -#endif -} - -static void __init pcpu_free_bootmem(void *ptr, size_t size) -{ -	memblock_free(ptr, size); -} -  static int pcpu_cpu_distance(unsigned int from, unsigned int to)  {  	if (early_cpu_to_node(from) == early_cpu_to_node(to)) @@ -823,53 +779,13 @@ static int pcpu_cpu_distance(unsigned int from, unsigned int to)  		return REMOTE_DISTANCE;  } -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; -EXPORT_SYMBOL(__per_cpu_offset); - -static void __init pcpu_populate_pte(unsigned long addr) +static __init int pcpu_cpu_to_node(int cpu)  { -	pgd_t *pgd = pgd_offset_k(addr); -	p4d_t *p4d; -	pud_t *pud; -	pmd_t *pmd; - -	p4d = p4d_offset(pgd, addr); -	if (p4d_none(*p4d)) { -		pud_t *new; - -		new = memblock_alloc(PUD_TABLE_SIZE, PUD_TABLE_SIZE); -		if (!new) -			goto err_alloc; -		p4d_populate(&init_mm, p4d, new); -	} - -	pud = pud_offset(p4d, addr); -	if (pud_none(*pud)) { -		pmd_t *new; - -		new = memblock_alloc(PMD_TABLE_SIZE, PMD_TABLE_SIZE); -		if (!new) -			goto err_alloc; -		pud_populate(&init_mm, pud, new); -	} - -	pmd = pmd_offset(pud, addr); -	if (!pmd_present(*pmd)) { -		pte_t *new; - -		new = memblock_alloc(PTE_TABLE_SIZE, PTE_TABLE_SIZE); -		if (!new) -			goto err_alloc; -		pmd_populate_kernel(&init_mm, pmd, new); -	} - -	return; - -err_alloc: -	panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", -	      __func__, PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); +	return early_cpu_to_node(cpu);  } +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; +EXPORT_SYMBOL(__per_cpu_offset);  void __init setup_per_cpu_areas(void)  { @@ -880,18 +796,27 @@ void __init setup_per_cpu_areas(void)  	int rc = -EINVAL;  	/* -	 * Linear mapping is one of 4K, 1M and 16M.  For 4K, no need -	 * to group units.  For larger mappings, use 1M atom which -	 * should be large enough to contain a number of units. +	 * BookE and BookS radix are historical values and should be revisited.  	 */ -	if (mmu_linear_psize == MMU_PAGE_4K) +	if (IS_ENABLED(CONFIG_PPC_BOOK3E)) { +		atom_size = SZ_1M; +	} else if (radix_enabled()) {  		atom_size = PAGE_SIZE; -	else -		atom_size = 1 << 20; +	} else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) { +		/* +		 * Linear mapping is one of 4K, 1M and 16M.  For 4K, no need +		 * to group units.  For larger mappings, use 1M atom which +		 * should be large enough to contain a number of units. +		 */ +		if (mmu_linear_psize == MMU_PAGE_4K) +			atom_size = PAGE_SIZE; +		else +			atom_size = SZ_1M; +	}  	if (pcpu_chosen_fc != PCPU_FC_PAGE) {  		rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, -					    pcpu_alloc_bootmem, pcpu_free_bootmem); +					    pcpu_cpu_to_node);  		if (rc)  			pr_warn("PERCPU: %s allocator failed (%d), "  				"falling back to page size\n", @@ -899,8 +824,7 @@ void __init setup_per_cpu_areas(void)  	}  	if (rc < 0) -		rc = pcpu_page_first_chunk(0, pcpu_alloc_bootmem, pcpu_free_bootmem, -					   pcpu_populate_pte); +		rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);  	if (rc < 0)  		panic("cannot initialize percpu area (err=%d)", rc); diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 3e053e2fd6b6..d84c434b2b78 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -527,16 +527,20 @@ static long restore_user_regs(struct pt_regs *regs,  	regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));  #ifdef CONFIG_SPE -	/* force the process to reload the spe registers from -	   current->thread when it next does spe instructions */ +	/* +	 * Force the process to reload the spe registers from +	 * current->thread when it next does spe instructions. +	 * Since this is user ABI, we must enforce the sizing. +	 */ +	BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32));  	regs_set_return_msr(regs, regs->msr & ~MSR_SPE);  	if (msr & MSR_SPE) {  		/* restore spe registers from the stack */ -		unsafe_copy_from_user(current->thread.evr, &sr->mc_vregs, -				      ELF_NEVRREG * sizeof(u32), failed); +		unsafe_copy_from_user(¤t->thread.spe, &sr->mc_vregs, +				      sizeof(current->thread.spe), failed);  		current->thread.used_spe = true;  	} else if (current->thread.used_spe) -		memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32)); +		memset(¤t->thread.spe, 0, sizeof(current->thread.spe));  	/* Always get SPEFSCR back */  	unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index c23ee842c4c3..b7fd6a72aa76 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -61,6 +61,7 @@  #include <asm/cpu_has_feature.h>  #include <asm/ftrace.h>  #include <asm/kup.h> +#include <asm/fadump.h>  #ifdef DEBUG  #include <asm/udbg.h> @@ -621,6 +622,45 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))  #endif  #ifdef CONFIG_NMI_IPI +static void crash_stop_this_cpu(struct pt_regs *regs) +#else +static void crash_stop_this_cpu(void *dummy) +#endif +{ +	/* +	 * Just busy wait here and avoid marking CPU as offline to ensure +	 * register data is captured appropriately. +	 */ +	while (1) +		cpu_relax(); +} + +void crash_smp_send_stop(void) +{ +	static bool stopped = false; + +	/* +	 * In case of fadump, register data for all CPUs is captured by f/w +	 * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before +	 * this rtas call to avoid tricky post processing of those CPUs' +	 * backtraces. +	 */ +	if (should_fadump_crash()) +		return; + +	if (stopped) +		return; + +	stopped = true; + +#ifdef CONFIG_NMI_IPI +	smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000); +#else +	smp_call_function(crash_stop_this_cpu, NULL, 0); +#endif /* CONFIG_NMI_IPI */ +} + +#ifdef CONFIG_NMI_IPI  static void nmi_stop_this_cpu(struct pt_regs *regs)  {  	/* @@ -896,7 +936,8 @@ out:  	return tg;  } -static int update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, int cpu, int cpu_group_start) +static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg, +					       int cpu, int cpu_group_start)  {  	int first_thread = cpu_first_thread_sibling(cpu);  	int i; @@ -1635,12 +1676,14 @@ void start_secondary(void *unused)  	BUG();  } +#ifdef CONFIG_PROFILING  int setup_profiling_timer(unsigned int multiplier)  {  	return 0;  } +#endif -static void fixup_topology(void) +static void __init fixup_topology(void)  {  	int i; diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S index f73f4d72fea4..e0cbd63007f2 100644 --- a/arch/powerpc/kernel/swsusp_32.S +++ b/arch/powerpc/kernel/swsusp_32.S @@ -181,7 +181,7 @@ _GLOBAL(swsusp_arch_resume)  #ifdef CONFIG_ALTIVEC  	/* Stop pending alitvec streams and memory accesses */  BEGIN_FTR_SECTION -	DSSALL +	PPC_DSSALL  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  #endif   	sync diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 96bb20715aa9..9f1903c7f540 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -141,7 +141,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)  _GLOBAL(swsusp_arch_resume)  	/* Stop pending alitvec streams and memory accesses */  BEGIN_FTR_SECTION -	DSSALL +	PPC_DSSALL  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  	sync diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 15109af9d075..2600b4237292 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -529,3 +529,4 @@  # 447 reserved for memfd_secret  448	common	process_mrelease		sys_process_mrelease  449	common  futex_waitv                     sys_futex_waitv +450 	nospu	set_mempolicy_home_node		sys_set_mempolicy_home_node diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 08d8072d6e7a..d45a415d5374 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -214,7 +214,7 @@ static ssize_t __used store_dscr_default(struct device *dev,  static DEVICE_ATTR(dscr_default, 0600,  		show_dscr_default, store_dscr_default); -static void sysfs_create_dscr_default(void) +static void __init sysfs_create_dscr_default(void)  {  	if (cpu_has_feature(CPU_FTR_DSCR)) {  		int cpu; @@ -744,12 +744,12 @@ static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char  }  static DEVICE_ATTR(svm, 0444, show_svm, NULL); -static void create_svm_file(void) +static void __init create_svm_file(void)  {  	device_create_file(cpu_subsys.dev_root, &dev_attr_svm);  }  #else -static void create_svm_file(void) +static void __init create_svm_file(void)  {  }  #endif /* CONFIG_PPC_SVM */ @@ -1110,7 +1110,7 @@ EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group);  /* NUMA stuff */  #ifdef CONFIG_NUMA -static void register_nodes(void) +static void __init register_nodes(void)  {  	int i; @@ -1134,7 +1134,7 @@ void sysfs_remove_device_from_node(struct device *dev, int nid)  EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);  #else -static void register_nodes(void) +static void __init register_nodes(void)  {  	return;  } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index cae8f03a44fe..62361cc7281c 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -88,6 +88,7 @@ static struct clocksource clocksource_timebase = {  #define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF  u64 decrementer_max = DECREMENTER_DEFAULT_MAX; +EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */  static int decrementer_set_next_event(unsigned long evt,  				      struct clock_event_device *dev); @@ -107,6 +108,7 @@ struct clock_event_device decrementer_clockevent = {  EXPORT_SYMBOL(decrementer_clockevent);  DEFINE_PER_CPU(u64, decrementers_next_tb); +EXPORT_SYMBOL_GPL(decrementers_next_tb);  static DEFINE_PER_CPU(struct clock_event_device, decrementers);  #define XSEC_PER_SEC (1024*1024) @@ -496,6 +498,16 @@ EXPORT_SYMBOL(profile_pc);   * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...   */  #ifdef CONFIG_PPC64 +static inline unsigned long test_irq_work_pending(void) +{ +	unsigned long x; + +	asm volatile("lbz %0,%1(13)" +		: "=r" (x) +		: "i" (offsetof(struct paca_struct, irq_work_pending))); +	return x; +} +  static inline void set_irq_work_pending_flag(void)  {  	asm volatile("stb %0,%1(13)" : : @@ -539,13 +551,44 @@ void arch_irq_work_raise(void)  	preempt_enable();  } +static void set_dec_or_work(u64 val) +{ +	set_dec(val); +	/* We may have raced with new irq work */ +	if (unlikely(test_irq_work_pending())) +		set_dec(1); +} +  #else  /* CONFIG_IRQ_WORK */  #define test_irq_work_pending()	0  #define clear_irq_work_pending() +static void set_dec_or_work(u64 val) +{ +	set_dec(val); +}  #endif /* CONFIG_IRQ_WORK */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +void timer_rearm_host_dec(u64 now) +{ +	u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); + +	WARN_ON_ONCE(!arch_irqs_disabled()); +	WARN_ON_ONCE(mfmsr() & MSR_EE); + +	if (now >= *next_tb) { +		local_paca->irq_happened |= PACA_IRQ_DEC; +	} else { +		now = *next_tb - now; +		if (now <= decrementer_max) +			set_dec_or_work(now); +	} +} +EXPORT_SYMBOL_GPL(timer_rearm_host_dec); +#endif +  /*   * timer_interrupt - gets called when the decrementer overflows,   * with interrupts disabled. @@ -566,22 +609,23 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)  		return;  	} -	/* Ensure a positive value is written to the decrementer, or else -	 * some CPUs will continue to take decrementer exceptions. When the -	 * PPC_WATCHDOG (decrementer based) is configured, keep this at most -	 * 31 bits, which is about 4 seconds on most systems, which gives -	 * the watchdog a chance of catching timer interrupt hard lockups. -	 */ -	if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) -		set_dec(0x7fffffff); -	else -		set_dec(decrementer_max); - -	/* Conditionally hard-enable interrupts now that the DEC has been -	 * bumped to its maximum value -	 */ -	may_hard_irq_enable(); +	/* Conditionally hard-enable interrupts. */ +	if (should_hard_irq_enable()) { +		/* +		 * Ensure a positive value is written to the decrementer, or +		 * else some CPUs will continue to take decrementer exceptions. +		 * When the PPC_WATCHDOG (decrementer based) is configured, +		 * keep this at most 31 bits, which is about 4 seconds on most +		 * systems, which gives the watchdog a chance of catching timer +		 * interrupt hard lockups. +		 */ +		if (IS_ENABLED(CONFIG_PPC_WATCHDOG)) +			set_dec(0x7fffffff); +		else +			set_dec(decrementer_max); +		do_hard_irq_enable(); +	}  #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)  	if (atomic_read(&ppc_n_lost_interrupts) != 0) @@ -606,10 +650,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)  	} else {  		now = *next_tb - now;  		if (now <= decrementer_max) -			set_dec(now); -		/* We may have raced with new irq work */ -		if (test_irq_work_pending()) -			set_dec(1); +			set_dec_or_work(now);  		__this_cpu_inc(irq_stat.timer_irqs_others);  	} @@ -730,7 +771,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val)  static void start_cpu_decrementer(void)  { -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_OR_40x  	unsigned int tcr;  	/* Clear any pending timer interrupts */ @@ -843,11 +884,7 @@ static int decrementer_set_next_event(unsigned long evt,  				      struct clock_event_device *dev)  {  	__this_cpu_write(decrementers_next_tb, get_tb() + evt); -	set_dec(evt); - -	/* We may have raced with new irq work */ -	if (test_irq_work_pending()) -		set_dec(1); +	set_dec_or_work(evt);  	return 0;  } diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 2b91f233b05d..3beecc32940b 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -226,11 +226,8 @@ _GLOBAL(tm_reclaim)  	/* Sync the userland GPRs 2-12, 14-31 to thread->regs: */  	SAVE_GPR(0, r7)				/* user r0 */ -	SAVE_GPR(2, r7)				/* user r2 */ -	SAVE_4GPRS(3, r7)			/* user r3-r6 */ -	SAVE_GPR(8, r7)				/* user r8 */ -	SAVE_GPR(9, r7)				/* user r9 */ -	SAVE_GPR(10, r7)			/* user r10 */ +	SAVE_GPRS(2, 6, r7)			/* user r2-r6 */ +	SAVE_GPRS(8, 10, r7)			/* user r8-r10 */  	ld	r3, GPR1(r1)			/* user r1 */  	ld	r4, GPR7(r1)			/* user r7 */  	ld	r5, GPR11(r1)			/* user r11 */ @@ -445,12 +442,8 @@ restore_gprs:  	ld	r6, THREAD_TM_PPR(r3)  	REST_GPR(0, r7)				/* GPR0 */ -	REST_2GPRS(2, r7)			/* GPR2-3 */ -	REST_GPR(4, r7)				/* GPR4 */ -	REST_4GPRS(8, r7)			/* GPR8-11 */ -	REST_2GPRS(12, r7)			/* GPR12-13 */ - -	REST_NVGPRS(r7)				/* GPR14-31 */ +	REST_GPRS(2, 4, r7)			/* GPR2-4 */ +	REST_GPRS(8, 31, r7)			/* GPR8-31 */  	/* Load up PPR and DSCR here so we don't run with user values for long */  	mtspr	SPRN_DSCR, r5 diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index d89c5df4f206..80b6285769f2 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -41,10 +41,10 @@  #define	NUM_FTRACE_TRAMPS	8  static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; -static struct ppc_inst +static ppc_inst_t  ftrace_call_replace(unsigned long ip, unsigned long addr, int link)  { -	struct ppc_inst op; +	ppc_inst_t op;  	addr = ppc_function_entry((void *)addr); @@ -55,9 +55,9 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link)  }  static int -ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new) +ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)  { -	struct ppc_inst replaced; +	ppc_inst_t replaced;  	/*  	 * Note: @@ -90,24 +90,24 @@ ftrace_modify_code(unsigned long ip, struct ppc_inst old, struct ppc_inst new)   */  static int test_24bit_addr(unsigned long ip, unsigned long addr)  { -	struct ppc_inst op; +	ppc_inst_t op;  	addr = ppc_function_entry((void *)addr);  	/* use the create_branch to verify that this offset can be branched */  	return create_branch(&op, (u32 *)ip, addr, 0) == 0;  } -static int is_bl_op(struct ppc_inst op) +static int is_bl_op(ppc_inst_t op)  {  	return (ppc_inst_val(op) & 0xfc000003) == 0x48000001;  } -static int is_b_op(struct ppc_inst op) +static int is_b_op(ppc_inst_t op)  {  	return (ppc_inst_val(op) & 0xfc000003) == 0x48000000;  } -static unsigned long find_bl_target(unsigned long ip, struct ppc_inst op) +static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op)  {  	int offset; @@ -127,7 +127,7 @@ __ftrace_make_nop(struct module *mod,  {  	unsigned long entry, ptr, tramp;  	unsigned long ip = rec->ip; -	struct ppc_inst op, pop; +	ppc_inst_t op, pop;  	/* read where this goes */  	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { @@ -221,10 +221,9 @@ static int  __ftrace_make_nop(struct module *mod,  		  struct dyn_ftrace *rec, unsigned long addr)  { -	struct ppc_inst op; -	unsigned int jmp[4]; +	ppc_inst_t op;  	unsigned long ip = rec->ip; -	unsigned long tramp; +	unsigned long tramp, ptr;  	if (copy_from_kernel_nofault(&op, (void *)ip, MCOUNT_INSN_SIZE))  		return -EFAULT; @@ -238,41 +237,13 @@ __ftrace_make_nop(struct module *mod,  	/* lets find where the pointer goes */  	tramp = find_bl_target(ip, op); -	/* -	 * On PPC32 the trampoline looks like: -	 *  0x3d, 0x80, 0x00, 0x00  lis r12,sym@ha -	 *  0x39, 0x8c, 0x00, 0x00  addi r12,r12,sym@l -	 *  0x7d, 0x89, 0x03, 0xa6  mtctr r12 -	 *  0x4e, 0x80, 0x04, 0x20  bctr -	 */ - -	pr_devel("ip:%lx jumps to %lx", ip, tramp); -  	/* Find where the trampoline jumps to */ -	if (copy_from_kernel_nofault(jmp, (void *)tramp, sizeof(jmp))) { -		pr_err("Failed to read %lx\n", tramp); +	if (module_trampoline_target(mod, tramp, &ptr)) { +		pr_err("Failed to get trampoline target\n");  		return -EFAULT;  	} -	pr_devel(" %08x %08x ", jmp[0], jmp[1]); - -	/* verify that this is what we expect it to be */ -	if (((jmp[0] & 0xffff0000) != 0x3d800000) || -	    ((jmp[1] & 0xffff0000) != 0x398c0000) || -	    (jmp[2] != 0x7d8903a6) || -	    (jmp[3] != 0x4e800420)) { -		pr_err("Not a trampoline\n"); -		return -EINVAL; -	} - -	tramp = (jmp[1] & 0xffff) | -		((jmp[0] & 0xffff) << 16); -	if (tramp & 0x8000) -		tramp -= 0x10000; - -	pr_devel(" %lx ", tramp); - -	if (tramp != addr) { +	if (ptr != addr) {  		pr_err("Trampoline location %08lx does not match addr\n",  		       tramp);  		return -EINVAL; @@ -291,7 +262,7 @@ __ftrace_make_nop(struct module *mod,  static unsigned long find_ftrace_tramp(unsigned long ip)  {  	int i; -	struct ppc_inst instr; +	ppc_inst_t instr;  	/*  	 * We have the compiler generated long_branch tramps at the end @@ -329,9 +300,9 @@ static int add_ftrace_tramp(unsigned long tramp)  static int setup_mcount_compiler_tramp(unsigned long tramp)  {  	int i; -	struct ppc_inst op; +	ppc_inst_t op;  	unsigned long ptr; -	struct ppc_inst instr; +	ppc_inst_t instr;  	static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];  	/* Is this a known long jump tramp? */ @@ -396,7 +367,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)  static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)  {  	unsigned long tramp, ip = rec->ip; -	struct ppc_inst op; +	ppc_inst_t op;  	/* Read where this goes */  	if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { @@ -436,7 +407,7 @@ int ftrace_make_nop(struct module *mod,  		    struct dyn_ftrace *rec, unsigned long addr)  {  	unsigned long ip = rec->ip; -	struct ppc_inst old, new; +	ppc_inst_t old, new;  	/*  	 * If the calling address is more that 24 bits away, @@ -489,7 +460,7 @@ int ftrace_make_nop(struct module *mod,   */  #ifndef CONFIG_MPROFILE_KERNEL  static int -expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) +expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)  {  	/*  	 * We expect to see: @@ -507,7 +478,7 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)  }  #else  static int -expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1) +expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)  {  	/* look for patched "NOP" on ppc64 with -mprofile-kernel */  	if (!ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()))) @@ -519,8 +490,8 @@ expected_nop_sequence(void *ip, struct ppc_inst op0, struct ppc_inst op1)  static int  __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  { -	struct ppc_inst op[2]; -	struct ppc_inst instr; +	ppc_inst_t op[2]; +	ppc_inst_t instr;  	void *ip = (void *)rec->ip;  	unsigned long entry, ptr, tramp;  	struct module *mod = rec->arch.mod; @@ -588,8 +559,10 @@ static int  __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  {  	int err; -	struct ppc_inst op; +	ppc_inst_t op;  	u32 *ip = (u32 *)rec->ip; +	struct module *mod = rec->arch.mod; +	unsigned long tramp;  	/* read where this goes */  	if (copy_inst_from_kernel_nofault(&op, ip)) @@ -602,13 +575,23 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  	}  	/* If we never set up a trampoline to ftrace_caller, then bail */ -	if (!rec->arch.mod->arch.tramp) { +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +	if (!mod->arch.tramp || !mod->arch.tramp_regs) { +#else +	if (!mod->arch.tramp) { +#endif  		pr_err("No ftrace trampoline\n");  		return -EINVAL;  	} +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +	if (rec->flags & FTRACE_FL_REGS) +		tramp = mod->arch.tramp_regs; +	else +#endif +		tramp = mod->arch.tramp;  	/* create the branch to the trampoline */ -	err = create_branch(&op, ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK); +	err = create_branch(&op, ip, tramp, BRANCH_SET_LINK);  	if (err) {  		pr_err("REL24 out of range!\n");  		return -EINVAL; @@ -626,7 +609,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)  { -	struct ppc_inst op; +	ppc_inst_t op;  	void *ip = (void *)rec->ip;  	unsigned long tramp, entry, ptr; @@ -674,7 +657,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)  int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  {  	unsigned long ip = rec->ip; -	struct ppc_inst old, new; +	ppc_inst_t old, new;  	/*  	 * If the calling address is more that 24 bits away, @@ -713,7 +696,7 @@ static int  __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,  					unsigned long addr)  { -	struct ppc_inst op; +	ppc_inst_t op;  	unsigned long ip = rec->ip;  	unsigned long entry, ptr, tramp;  	struct module *mod = rec->arch.mod; @@ -807,7 +790,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,  			unsigned long addr)  {  	unsigned long ip = rec->ip; -	struct ppc_inst old, new; +	ppc_inst_t old, new;  	/*  	 * If the calling address is more that 24 bits away, @@ -847,7 +830,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,  int ftrace_update_ftrace_func(ftrace_func_t func)  {  	unsigned long ip = (unsigned long)(&ftrace_call); -	struct ppc_inst old, new; +	ppc_inst_t old, new;  	int ret;  	old = ppc_inst_read((u32 *)&ftrace_call); @@ -932,7 +915,7 @@ int ftrace_enable_ftrace_graph_caller(void)  	unsigned long ip = (unsigned long)(&ftrace_graph_call);  	unsigned long addr = (unsigned long)(&ftrace_graph_caller);  	unsigned long stub = (unsigned long)(&ftrace_graph_stub); -	struct ppc_inst old, new; +	ppc_inst_t old, new;  	old = ftrace_call_replace(ip, stub, 0);  	new = ftrace_call_replace(ip, addr, 0); @@ -945,7 +928,7 @@ int ftrace_disable_ftrace_graph_caller(void)  	unsigned long ip = (unsigned long)(&ftrace_graph_call);  	unsigned long addr = (unsigned long)(&ftrace_graph_caller);  	unsigned long stub = (unsigned long)(&ftrace_graph_stub); -	struct ppc_inst old, new; +	ppc_inst_t old, new;  	old = ftrace_call_replace(ip, addr, 0);  	new = ftrace_call_replace(ip, stub, 0); diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S index e023ae59c429..0a02c0cb12d9 100644 --- a/arch/powerpc/kernel/trace/ftrace_32.S +++ b/arch/powerpc/kernel/trace/ftrace_32.S @@ -9,55 +9,135 @@  #include <asm/asm-offsets.h>  #include <asm/ftrace.h>  #include <asm/export.h> +#include <asm/ptrace.h>  _GLOBAL(mcount)  _GLOBAL(_mcount)  	/*  	 * It is required that _mcount on PPC32 must preserve the -	 * link register. But we have r0 to play with. We use r0 +	 * link register. But we have r12 to play with. We use r12  	 * to push the return address back to the caller of mcount  	 * into the ctr register, restore the link register and  	 * then jump back using the ctr register.  	 */ -	mflr	r0 -	mtctr	r0 -	lwz	r0, 4(r1) +	mflr	r12 +	mtctr	r12  	mtlr	r0  	bctr +EXPORT_SYMBOL(_mcount)  _GLOBAL(ftrace_caller)  	MCOUNT_SAVE_FRAME  	/* r3 ends up with link register */  	subi	r3, r3, MCOUNT_INSN_SIZE +	lis	r5,function_trace_op@ha +	lwz	r5,function_trace_op@l(r5) +	li	r6, 0  .globl ftrace_call  ftrace_call:  	bl	ftrace_stub  	nop +	MCOUNT_RESTORE_FRAME +ftrace_caller_common:  #ifdef CONFIG_FUNCTION_GRAPH_TRACER  .globl ftrace_graph_call  ftrace_graph_call:  	b	ftrace_graph_stub  _GLOBAL(ftrace_graph_stub)  #endif -	MCOUNT_RESTORE_FRAME  	/* old link register ends up in ctr reg */  	bctr -EXPORT_SYMBOL(_mcount)  _GLOBAL(ftrace_stub)  	blr +_GLOBAL(ftrace_regs_caller) +	/* Save the original return address in A's stack frame */ +	stw	r0,LRSAVE(r1) + +	/* Create our stack frame + pt_regs */ +	stwu	r1,-INT_FRAME_SIZE(r1) + +	/* Save all gprs to pt_regs */ +	stw	r0, GPR0(r1) +	stmw	r2, GPR2(r1) + +	/* Save previous stack pointer (r1) */ +	addi	r8, r1, INT_FRAME_SIZE +	stw	r8, GPR1(r1) + +	/* Load special regs for save below */ +	mfmsr   r8 +	mfctr   r9 +	mfxer   r10 +	mfcr	r11 + +	/* Get the _mcount() call site out of LR */ +	mflr	r7 +	/* Save it as pt_regs->nip */ +	stw     r7, _NIP(r1) +	/* Save the read LR in pt_regs->link */ +	stw     r0, _LINK(r1) + +	lis	r3,function_trace_op@ha +	lwz	r5,function_trace_op@l(r3) + +	/* Calculate ip from nip-4 into r3 for call below */ +	subi    r3, r7, MCOUNT_INSN_SIZE + +	/* Put the original return address in r4 as parent_ip */ +	mr	r4, r0 + +	/* Save special regs */ +	stw     r8, _MSR(r1) +	stw     r9, _CTR(r1) +	stw     r10, _XER(r1) +	stw     r11, _CCR(r1) + +	/* Load &pt_regs in r6 for call below */ +	addi    r6, r1, STACK_FRAME_OVERHEAD + +	/* ftrace_call(r3, r4, r5, r6) */ +.globl ftrace_regs_call +ftrace_regs_call: +	bl	ftrace_stub +	nop + +	/* Load ctr with the possibly modified NIP */ +	lwz	r3, _NIP(r1) +	mtctr	r3 + +	/* Restore gprs */ +	lmw	r2, GPR2(r1) + +	/* Restore possibly modified LR */ +	lwz	r0, _LINK(r1) +	mtlr	r0 + +	/* Pop our stack frame */ +	addi r1, r1, INT_FRAME_SIZE + +	b	ftrace_caller_common +  #ifdef CONFIG_FUNCTION_GRAPH_TRACER  _GLOBAL(ftrace_graph_caller) +	stwu	r1,-48(r1) +	stw	r3, 12(r1) +	stw	r4, 16(r1) +	stw	r5, 20(r1) +	stw	r6, 24(r1) +	stw	r7, 28(r1) +	stw	r8, 32(r1) +	stw	r9, 36(r1) +	stw	r10,40(r1) +  	addi	r5, r1, 48 -	/* load r4 with local address */ -	lwz	r4, 44(r1) +	mfctr	r4		/* ftrace_caller has moved local addr here */ +	stw	r4, 44(r1) +	mflr	r3		/* ftrace_caller has restored LR from stack */  	subi	r4, r4, MCOUNT_INSN_SIZE -	/* Grab the LR out of the caller stack frame */ -	lwz	r3,52(r1) -  	bl	prepare_ftrace_return  	nop @@ -66,9 +146,21 @@ _GLOBAL(ftrace_graph_caller)           * Change the LR in the callers stack frame to this.           */  	stw	r3,52(r1) +	mtlr	r3 +	lwz	r0,44(r1) +	mtctr	r0 + +	lwz	r3, 12(r1) +	lwz	r4, 16(r1) +	lwz	r5, 20(r1) +	lwz	r6, 24(r1) +	lwz	r7, 28(r1) +	lwz	r8, 32(r1) +	lwz	r9, 36(r1) +	lwz	r10,40(r1) + +	addi	r1, r1, 48 -	MCOUNT_RESTORE_FRAME -	/* old link register ends up in ctr reg */  	bctr  _GLOBAL(return_to_handler) diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S index f9fd5f743eba..d636fc755f60 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S @@ -41,15 +41,14 @@ _GLOBAL(ftrace_regs_caller)  	/* Save all gprs to pt_regs */  	SAVE_GPR(0, r1) -	SAVE_10GPRS(2, r1) +	SAVE_GPRS(2, 11, r1)  	/* Ok to continue? */  	lbz	r3, PACA_FTRACE_ENABLED(r13)  	cmpdi	r3, 0  	beq	ftrace_no_trace -	SAVE_10GPRS(12, r1) -	SAVE_10GPRS(22, r1) +	SAVE_GPRS(12, 31, r1)  	/* Save previous stack pointer (r1) */  	addi	r8, r1, SWITCH_FRAME_SIZE @@ -108,10 +107,8 @@ ftrace_regs_call:  #endif  	/* Restore gprs */ -	REST_GPR(0,r1) -	REST_10GPRS(2,r1) -	REST_10GPRS(12,r1) -	REST_10GPRS(22,r1) +	REST_GPR(0, r1) +	REST_GPRS(2, 31, r1)  	/* Restore possibly modified LR */  	ld	r0, _LINK(r1) @@ -157,7 +154,7 @@ _GLOBAL(ftrace_caller)  	stdu	r1, -SWITCH_FRAME_SIZE(r1)  	/* Save all gprs to pt_regs */ -	SAVE_8GPRS(3, r1) +	SAVE_GPRS(3, 10, r1)  	lbz	r3, PACA_FTRACE_ENABLED(r13)  	cmpdi	r3, 0 @@ -194,7 +191,7 @@ ftrace_call:  	mtctr	r3  	/* Restore gprs */ -	REST_8GPRS(3,r1) +	REST_GPRS(3, 10, r1)  	/* Restore callee's TOC */  	ld	r2, 24(r1) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 11741703d26e..a08bb7cefdc5 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -245,7 +245,7 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,  	if (panic_on_oops)  		panic("Fatal exception"); -	do_exit(signr); +	make_task_dead(signr);  }  NOKPROBE_SYMBOL(oops_end); @@ -792,9 +792,9 @@ int machine_check_generic(struct pt_regs *regs)  void die_mce(const char *str, struct pt_regs *regs, long err)  {  	/* -	 * The machine check wants to kill the interrupted context, but -	 * do_exit() checks for in_interrupt() and panics in that case, so -	 * exit the irq/nmi before calling die. +	 * The machine check wants to kill the interrupted context, +	 * but make_task_dead() checks for in_interrupt() and panics +	 * in that case, so exit the irq/nmi before calling die.  	 */  	if (in_nmi())  		nmi_exit(); diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 8513aa49614e..d3942de254c6 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -84,7 +84,7 @@ static int udbg_uart_getc(void)  	return udbg_uart_in(UART_RBR);  } -static void udbg_use_uart(void) +static void __init udbg_use_uart(void)  {  	udbg_putc = udbg_uart_putc;  	udbg_flush = udbg_uart_flush; @@ -92,7 +92,7 @@ static void udbg_use_uart(void)  	udbg_getc_poll = udbg_uart_getc_poll;  } -void udbg_uart_setup(unsigned int speed, unsigned int clock) +void __init udbg_uart_setup(unsigned int speed, unsigned int clock)  {  	unsigned int dll, base_bauds; @@ -121,7 +121,7 @@ void udbg_uart_setup(unsigned int speed, unsigned int clock)  	udbg_uart_out(UART_FCR, 0x7);  } -unsigned int udbg_probe_uart_speed(unsigned int clock) +unsigned int __init udbg_probe_uart_speed(unsigned int clock)  {  	unsigned int dll, dlm, divisor, prescaler, speed;  	u8 old_lcr; @@ -172,7 +172,7 @@ static void udbg_uart_out_pio(unsigned int reg, u8 data)  	outb(data, udbg_uart.pio_base + (reg * udbg_uart_stride));  } -void udbg_uart_init_pio(unsigned long port, unsigned int stride) +void __init udbg_uart_init_pio(unsigned long port, unsigned int stride)  {  	if (!port)  		return; @@ -194,7 +194,7 @@ static void udbg_uart_out_mmio(unsigned int reg, u8 data)  } -void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) +void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)  {  	if (!addr)  		return; diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c index ae632569446f..fd9432875ebc 100644 --- a/arch/powerpc/kernel/vecemu.c +++ b/arch/powerpc/kernel/vecemu.c @@ -261,7 +261,7 @@ static unsigned int rfin(unsigned int x)  int emulate_altivec(struct pt_regs *regs)  { -	struct ppc_inst instr; +	ppc_inst_t instr;  	unsigned int i, word;  	unsigned int va, vb, vc, vd;  	vector128 *vrs; diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index ba03eedfdcd8..5cc24d8cce94 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -47,6 +47,10 @@ EXPORT_SYMBOL(store_vr_state)   */  _GLOBAL(load_up_altivec)  	mfmsr	r5			/* grab the current MSR */ +#ifdef CONFIG_PPC_BOOK3S_64 +	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */ +	ori	r5,r5,MSR_RI +#endif  	oris	r5,r5,MSR_VEC@h  	MTMSRD(r5)			/* enable use of AltiVec now */  	isync @@ -126,6 +130,12 @@ _GLOBAL(load_up_vsx)  	andis.	r5,r12,MSR_VEC@h  	beql+	load_up_altivec		/* skip if already loaded */ +#ifdef CONFIG_PPC_BOOK3S_64 +	/* interrupt doesn't set MSR[RI] and HPT can fault on current access */ +	li	r5,MSR_RI +	mtmsrd	r5,1 +#endif +  	ld	r4,PACACURRENT(r13)  	addi	r4,r4,THREAD		/* Get THREAD */  	li	r6,1 diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 18e42c74abdd..2bcca818136a 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -322,10 +322,6 @@ SECTIONS  #ifdef CONFIG_PPC32  	.data : AT(ADDR(.data) - LOAD_OFFSET) {  		DATA_DATA -#ifdef CONFIG_UBSAN -		*(.data..Lubsan_data*) -		*(.data..Lubsan_type*) -#endif  		*(.data.rel*)  		*(SDATA_MAIN)  		*(.sdata2) @@ -336,24 +332,18 @@ SECTIONS  #else  	.data : AT(ADDR(.data) - LOAD_OFFSET) {  		DATA_DATA -#ifdef CONFIG_UBSAN -		*(.data..Lubsan_data*) -		*(.data..Lubsan_type*) -#endif  		*(.data.rel*)  		*(.toc1)  		*(.branch_lt)  	} -	. = ALIGN(256); -	.got : AT(ADDR(.got) - LOAD_OFFSET) { -		__toc_start = .; +	.got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) { +		*(.got)  #ifndef CONFIG_RELOCATABLE  		__prom_init_toc_start = .; -		arch/powerpc/kernel/prom_init.o*(.toc .got) +		arch/powerpc/kernel/prom_init.o*(.toc)  		__prom_init_toc_end = .;  #endif -		*(.got)  		*(.toc)  	}  #endif diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 3fa6d240bade..bfc27496fe7e 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -85,10 +85,37 @@ static DEFINE_PER_CPU(u64, wd_timer_tb);  /* SMP checker bits */  static unsigned long __wd_smp_lock; +static unsigned long __wd_reporting; +static unsigned long __wd_nmi_output;  static cpumask_t wd_smp_cpus_pending;  static cpumask_t wd_smp_cpus_stuck;  static u64 wd_smp_last_reset_tb; +/* + * Try to take the exclusive watchdog action / NMI IPI / printing lock. + * wd_smp_lock must be held. If this fails, we should return and wait + * for the watchdog to kick in again (or another CPU to trigger it). + * + * Importantly, if hardlockup_panic is set, wd_try_report failure should + * not delay the panic, because whichever other CPU is reporting will + * call panic. + */ +static bool wd_try_report(void) +{ +	if (__wd_reporting) +		return false; +	__wd_reporting = 1; +	return true; +} + +/* End printing after successful wd_try_report. wd_smp_lock not required. */ +static void wd_end_reporting(void) +{ +	smp_mb(); /* End printing "critical section" */ +	WARN_ON_ONCE(__wd_reporting == 0); +	WRITE_ONCE(__wd_reporting, 0); +} +  static inline void wd_smp_lock(unsigned long *flags)  {  	/* @@ -128,109 +155,182 @@ static void wd_lockup_ipi(struct pt_regs *regs)  	else  		dump_stack(); +	/* +	 * __wd_nmi_output must be set after we printk from NMI context. +	 * +	 * printk from NMI context defers printing to the console to irq_work. +	 * If that NMI was taken in some code that is hard-locked, then irqs +	 * are disabled so irq_work will never fire. That can result in the +	 * hard lockup messages being delayed (indefinitely, until something +	 * else kicks the console drivers). +	 * +	 * Setting __wd_nmi_output will cause another CPU to notice and kick +	 * the console drivers for us. +	 * +	 * xchg is not needed here (it could be a smp_mb and store), but xchg +	 * gives the memory ordering and atomicity required. +	 */ +	xchg(&__wd_nmi_output, 1); +  	/* Do not panic from here because that can recurse into NMI IPI layer */  } -static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) +static bool set_cpu_stuck(int cpu)  { -	cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask); -	cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask); +	cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); +	cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); +	/* +	 * See wd_smp_clear_cpu_pending() +	 */ +	smp_mb();  	if (cpumask_empty(&wd_smp_cpus_pending)) { -		wd_smp_last_reset_tb = tb; +		wd_smp_last_reset_tb = get_tb();  		cpumask_andnot(&wd_smp_cpus_pending,  				&wd_cpus_enabled,  				&wd_smp_cpus_stuck); +		return true;  	} -} -static void set_cpu_stuck(int cpu, u64 tb) -{ -	set_cpumask_stuck(cpumask_of(cpu), tb); +	return false;  } -static void watchdog_smp_panic(int cpu, u64 tb) +static void watchdog_smp_panic(int cpu)  { +	static cpumask_t wd_smp_cpus_ipi; // protected by reporting  	unsigned long flags; +	u64 tb, last_reset;  	int c;  	wd_smp_lock(&flags);  	/* Double check some things under lock */ -	if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb) +	tb = get_tb(); +	last_reset = wd_smp_last_reset_tb; +	if ((s64)(tb - last_reset) < (s64)wd_smp_panic_timeout_tb)  		goto out;  	if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))  		goto out; -	if (cpumask_weight(&wd_smp_cpus_pending) == 0) +	if (!wd_try_report()) +		goto out; +	for_each_online_cpu(c) { +		if (!cpumask_test_cpu(c, &wd_smp_cpus_pending)) +			continue; +		if (c == cpu) +			continue; // should not happen + +		__cpumask_set_cpu(c, &wd_smp_cpus_ipi); +		if (set_cpu_stuck(c)) +			break; +	} +	if (cpumask_empty(&wd_smp_cpus_ipi)) { +		wd_end_reporting();  		goto out; +	} +	wd_smp_unlock(&flags);  	pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n", -		 cpu, cpumask_pr_args(&wd_smp_cpus_pending)); +		 cpu, cpumask_pr_args(&wd_smp_cpus_ipi));  	pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n", -		 cpu, tb, wd_smp_last_reset_tb, -		 tb_to_ns(tb - wd_smp_last_reset_tb) / 1000000); +		 cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000);  	if (!sysctl_hardlockup_all_cpu_backtrace) {  		/*  		 * Try to trigger the stuck CPUs, unless we are going to  		 * get a backtrace on all of them anyway.  		 */ -		for_each_cpu(c, &wd_smp_cpus_pending) { -			if (c == cpu) -				continue; +		for_each_cpu(c, &wd_smp_cpus_ipi) {  			smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000); +			__cpumask_clear_cpu(c, &wd_smp_cpus_ipi);  		} -	} - -	/* Take the stuck CPUs out of the watch group */ -	set_cpumask_stuck(&wd_smp_cpus_pending, tb); - -	wd_smp_unlock(&flags); - -	if (sysctl_hardlockup_all_cpu_backtrace) +	} else {  		trigger_allbutself_cpu_backtrace(); - -	/* -	 * Force flush any remote buffers that might be stuck in IRQ context -	 * and therefore could not run their irq_work. -	 */ -	printk_trigger_flush(); +		cpumask_clear(&wd_smp_cpus_ipi); +	}  	if (hardlockup_panic)  		nmi_panic(NULL, "Hard LOCKUP"); +	wd_end_reporting(); +  	return;  out:  	wd_smp_unlock(&flags);  } -static void wd_smp_clear_cpu_pending(int cpu, u64 tb) +static void wd_smp_clear_cpu_pending(int cpu)  {  	if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {  		if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {  			struct pt_regs *regs = get_irq_regs();  			unsigned long flags; -			wd_smp_lock(&flags); -  			pr_emerg("CPU %d became unstuck TB:%lld\n", -				 cpu, tb); +				 cpu, get_tb());  			print_irqtrace_events(current);  			if (regs)  				show_regs(regs);  			else  				dump_stack(); +			wd_smp_lock(&flags);  			cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);  			wd_smp_unlock(&flags); +		} else { +			/* +			 * The last CPU to clear pending should have reset the +			 * watchdog so we generally should not find it empty +			 * here if our CPU was clear. However it could happen +			 * due to a rare race with another CPU taking the +			 * last CPU out of the mask concurrently. +			 * +			 * We can't add a warning for it. But just in case +			 * there is a problem with the watchdog that is causing +			 * the mask to not be reset, try to kick it along here. +			 */ +			if (unlikely(cpumask_empty(&wd_smp_cpus_pending))) +				goto none_pending;  		}  		return;  	} + +	/* +	 * All other updates to wd_smp_cpus_pending are performed under +	 * wd_smp_lock. All of them are atomic except the case where the +	 * mask becomes empty and is reset. This will not happen here because +	 * cpu was tested to be in the bitmap (above), and a CPU only clears +	 * its own bit. _Except_ in the case where another CPU has detected a +	 * hard lockup on our CPU and takes us out of the pending mask. So in +	 * normal operation there will be no race here, no problem. +	 * +	 * In the lockup case, this atomic clear-bit vs a store that refills +	 * other bits in the accessed word wll not be a problem. The bit clear +	 * is atomic so it will not cause the store to get lost, and the store +	 * will never set this bit so it will not overwrite the bit clear. The +	 * only way for a stuck CPU to return to the pending bitmap is to +	 * become unstuck itself. +	 */  	cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + +	/* +	 * Order the store to clear pending with the load(s) to check all +	 * words in the pending mask to check they are all empty. This orders +	 * with the same barrier on another CPU. This prevents two CPUs +	 * clearing the last 2 pending bits, but neither seeing the other's +	 * store when checking if the mask is empty, and missing an empty +	 * mask, which ends with a false positive. +	 */ +	smp_mb();  	if (cpumask_empty(&wd_smp_cpus_pending)) {  		unsigned long flags; +none_pending: +		/* +		 * Double check under lock because more than one CPU could see +		 * a clear mask with the lockless check after clearing their +		 * pending bits. +		 */  		wd_smp_lock(&flags);  		if (cpumask_empty(&wd_smp_cpus_pending)) { -			wd_smp_last_reset_tb = tb; +			wd_smp_last_reset_tb = get_tb();  			cpumask_andnot(&wd_smp_cpus_pending,  					&wd_cpus_enabled,  					&wd_smp_cpus_stuck); @@ -245,10 +345,21 @@ static void watchdog_timer_interrupt(int cpu)  	per_cpu(wd_timer_tb, cpu) = tb; -	wd_smp_clear_cpu_pending(cpu, tb); +	wd_smp_clear_cpu_pending(cpu);  	if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb) -		watchdog_smp_panic(cpu, tb); +		watchdog_smp_panic(cpu); + +	if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) { +		/* +		 * Something has called printk from NMI context. It might be +		 * stuck, so this this triggers a flush that will get that +		 * printk output to the console. +		 * +		 * See wd_lockup_ipi. +		 */ +		printk_trigger_flush(); +	}  }  DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) @@ -267,12 +378,27 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)  	tb = get_tb();  	if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) { +		/* +		 * Taking wd_smp_lock here means it is a soft-NMI lock, which +		 * means we can't take any regular or irqsafe spin locks while +		 * holding this lock. This is why timers can't printk while +		 * holding the lock. +		 */  		wd_smp_lock(&flags);  		if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {  			wd_smp_unlock(&flags);  			return 0;  		} -		set_cpu_stuck(cpu, tb); +		if (!wd_try_report()) { +			wd_smp_unlock(&flags); +			/* Couldn't report, try again in 100ms */ +			mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000); +			return 0; +		} + +		set_cpu_stuck(cpu); + +		wd_smp_unlock(&flags);  		pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n",  			 cpu, (void *)regs->nip); @@ -283,14 +409,21 @@ DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)  		print_irqtrace_events(current);  		show_regs(regs); -		wd_smp_unlock(&flags); +		xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi  		if (sysctl_hardlockup_all_cpu_backtrace)  			trigger_allbutself_cpu_backtrace();  		if (hardlockup_panic)  			nmi_panic(regs, "Hard LOCKUP"); + +		wd_end_reporting();  	} +	/* +	 * We are okay to change DEC in soft_nmi_interrupt because the masked +	 * handler has marked a DEC as pending, so the timer interrupt will be +	 * replayed as soon as local irqs are enabled again. +	 */  	if (wd_panic_timeout_tb < 0x7fffffff)  		mtspr(SPRN_DEC, wd_panic_timeout_tb); @@ -318,11 +451,15 @@ void arch_touch_nmi_watchdog(void)  {  	unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;  	int cpu = smp_processor_id(); -	u64 tb = get_tb(); +	u64 tb; +	if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) +		return; + +	tb = get_tb();  	if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {  		per_cpu(wd_timer_tb, cpu) = tb; -		wd_smp_clear_cpu_pending(cpu, tb); +		wd_smp_clear_cpu_pending(cpu);  	}  }  EXPORT_SYMBOL(arch_touch_nmi_watchdog); @@ -380,7 +517,7 @@ static void stop_watchdog(void *arg)  	cpumask_clear_cpu(cpu, &wd_cpus_enabled);  	wd_smp_unlock(&flags); -	wd_smp_clear_cpu_pending(cpu, get_tb()); +	wd_smp_clear_cpu_pending(cpu);  }  static int stop_watchdog_on_cpu(unsigned int cpu) |