diff options
Diffstat (limited to 'arch/powerpc/kernel')
33 files changed, 1054 insertions, 439 deletions
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d24a59a98c0c..9f14d95b8b32 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -185,7 +185,9 @@ int main(void)  				 offsetof(struct task_struct, thread_info));  	OFFSET(PACASAVEDMSR, paca_struct, saved_msr);  	OFFSET(PACAR1, paca_struct, saved_r1); +#ifndef CONFIG_PPC_KERNEL_PCREL  	OFFSET(PACATOC, paca_struct, kernel_toc); +#endif  	OFFSET(PACAKBASE, paca_struct, kernelbase);  	OFFSET(PACAKMSR, paca_struct, kernel_msr);  #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 2769889219bf..19e46fd623b0 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -235,7 +235,7 @@ int __init btext_find_display(int allow_nonstdout)  		return rc;  	for_each_node_by_type(np, "display") { -		if (of_get_property(np, "linux,opened", NULL)) { +		if (of_property_read_bool(np, "linux,opened")) {  			printk("trying %pOF ...\n", np);  			rc = btext_initialize(np);  			printk("result: %d\n", rc); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 5604c9a1ac22..47f0dd9a45ad 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -183,12 +183,11 @@ syscall_exit_finish:  ret_from_fork:  	REST_NVGPRS(r1)  	bl	schedule_tail -	li	r3,0 +	li	r3,0	/* fork() return value */  	b	ret_from_syscall -	.globl	ret_from_kernel_thread -ret_from_kernel_thread: -	REST_NVGPRS(r1) +	.globl	ret_from_kernel_user_thread +ret_from_kernel_user_thread:  	bl	schedule_tail  	mtctr	r14  	mr	r3,r15 @@ -197,6 +196,22 @@ ret_from_kernel_thread:  	li	r3,0  	b	ret_from_syscall +	.globl	start_kernel_thread +start_kernel_thread: +	bl	schedule_tail +	mtctr	r14 +	mr	r3,r15 +	PPC440EP_ERR42 +	bctrl +	/* +	 * This must not return. We actually want to BUG here, not WARN, +	 * because BUG will exit the process which is what the kernel thread +	 * should have done, which may give some hope of continuing. +	 */ +100:	trap +	EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0 + +  /*   * This routine switches between two different tasks.  The process   * state of one is saved on its kernel stack.  Then the state diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6441a1ba57ac..c33c8ebf8641 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1075,7 +1075,7 @@ EXC_COMMON_BEGIN(system_reset_common)  	__GEN_COMMON_BODY system_reset  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	system_reset_exception +	bl	CFUNC(system_reset_exception)  	/* Clear MSR_RI before setting SRR0 and SRR1. */  	li	r9,0 @@ -1223,9 +1223,9 @@ BEGIN_FTR_SECTION  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)  	addi	r3,r1,STACK_INT_FRAME_REGS  BEGIN_FTR_SECTION -	bl	machine_check_early_boot +	bl	CFUNC(machine_check_early_boot)  END_FTR_SECTION(0, 1)     // nop out after boot -	bl	machine_check_early +	bl	CFUNC(machine_check_early)  	std	r3,RESULT(r1)	/* Save result */  	ld	r12,_MSR(r1) @@ -1286,7 +1286,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)  	 * Queue up the MCE event so that we can log it later, while  	 * returning from kernel or opal call.  	 */ -	bl	machine_check_queue_event +	bl	CFUNC(machine_check_queue_event)  	MACHINE_CHECK_HANDLER_WINDUP  	RFI_TO_KERNEL @@ -1312,7 +1312,7 @@ EXC_COMMON_BEGIN(machine_check_common)  	 */  	GEN_COMMON machine_check  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	machine_check_exception_async +	bl	CFUNC(machine_check_exception_async)  	b	interrupt_return_srr @@ -1322,7 +1322,7 @@ EXC_COMMON_BEGIN(machine_check_common)   * done. Queue the event then call the idle code to do the wake up.   */  EXC_COMMON_BEGIN(machine_check_idle_common) -	bl	machine_check_queue_event +	bl	CFUNC(machine_check_queue_event)  	/*  	 * GPR-loss wakeups are relatively straightforward, because the @@ -1361,7 +1361,7 @@ EXC_COMMON_BEGIN(unrecoverable_mce)  BEGIN_FTR_SECTION  	li	r10,0 /* clear MSR_RI */  	mtmsrd	r10,1 -	bl	disable_machine_check +	bl	CFUNC(disable_machine_check)  END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)  	ld	r10,PACAKMSR(r13)  	li	r3,MSR_ME @@ -1378,14 +1378,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)  	 * the early handler which is a true NMI.  	 */  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	machine_check_exception +	bl	CFUNC(machine_check_exception)  	/*  	 * We will not reach here. Even if we did, there is no way out.  	 * Call unrecoverable_exception and die.  	 */  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	unrecoverable_exception +	bl	CFUNC(unrecoverable_exception)  	b	. @@ -1440,16 +1440,16 @@ EXC_COMMON_BEGIN(data_access_common)  	bne-	1f  #ifdef CONFIG_PPC_64S_HASH_MMU  BEGIN_MMU_FTR_SECTION -	bl	do_hash_fault +	bl	CFUNC(do_hash_fault)  MMU_FTR_SECTION_ELSE -	bl	do_page_fault +	bl	CFUNC(do_page_fault)  ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)  #else -	bl	do_page_fault +	bl	CFUNC(do_page_fault)  #endif  	b	interrupt_return_srr -1:	bl	do_break +1:	bl	CFUNC(do_break)  	/*  	 * do_break() may have changed the NV GPRS while handling a breakpoint.  	 * If so, we need to restore them with their updated values. @@ -1493,7 +1493,7 @@ EXC_COMMON_BEGIN(data_access_slb_common)  BEGIN_MMU_FTR_SECTION  	/* HPT case, do SLB fault */  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	do_slb_fault +	bl	CFUNC(do_slb_fault)  	cmpdi	r3,0  	bne-	1f  	b	fast_interrupt_return_srr @@ -1507,7 +1507,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)  #endif  	std	r3,RESULT(r1)  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	do_bad_segment_interrupt +	bl	CFUNC(do_bad_segment_interrupt)  	b	interrupt_return_srr @@ -1541,12 +1541,12 @@ EXC_COMMON_BEGIN(instruction_access_common)  	addi	r3,r1,STACK_INT_FRAME_REGS  #ifdef CONFIG_PPC_64S_HASH_MMU  BEGIN_MMU_FTR_SECTION -	bl	do_hash_fault +	bl	CFUNC(do_hash_fault)  MMU_FTR_SECTION_ELSE -	bl	do_page_fault +	bl	CFUNC(do_page_fault)  ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)  #else -	bl	do_page_fault +	bl	CFUNC(do_page_fault)  #endif  	b	interrupt_return_srr @@ -1581,7 +1581,7 @@ EXC_COMMON_BEGIN(instruction_access_slb_common)  BEGIN_MMU_FTR_SECTION  	/* HPT case, do SLB fault */  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	do_slb_fault +	bl	CFUNC(do_slb_fault)  	cmpdi	r3,0  	bne-	1f  	b	fast_interrupt_return_srr @@ -1595,7 +1595,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)  #endif  	std	r3,RESULT(r1)  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	do_bad_segment_interrupt +	bl	CFUNC(do_bad_segment_interrupt)  	b	interrupt_return_srr @@ -1649,7 +1649,7 @@ EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)  EXC_COMMON_BEGIN(hardware_interrupt_common)  	GEN_COMMON hardware_interrupt  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	do_IRQ +	bl	CFUNC(do_IRQ)  	BEGIN_FTR_SECTION  	b	interrupt_return_hsrr  	FTR_SECTION_ELSE @@ -1679,7 +1679,7 @@ EXC_VIRT_END(alignment, 0x4600, 0x100)  EXC_COMMON_BEGIN(alignment_common)  	GEN_COMMON alignment  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	alignment_exception +	bl	CFUNC(alignment_exception)  	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */  	b	interrupt_return_srr @@ -1745,7 +1745,7 @@ EXC_COMMON_BEGIN(program_check_common)  .Ldo_program_check:  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	program_check_exception +	bl	CFUNC(program_check_exception)  	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */  	b	interrupt_return_srr @@ -1777,7 +1777,7 @@ EXC_COMMON_BEGIN(fp_unavailable_common)  	GEN_COMMON fp_unavailable  	bne	1f			/* if from user, just load it up */  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	kernel_fp_unavailable_exception +	bl	CFUNC(kernel_fp_unavailable_exception)  0:	trap  	EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0  1: @@ -1790,12 +1790,12 @@ BEGIN_FTR_SECTION  	bne-	2f  END_FTR_SECTION_IFSET(CPU_FTR_TM)  #endif -	bl	load_up_fpu +	bl	CFUNC(load_up_fpu)  	b	fast_interrupt_return_srr  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM  2:	/* User process was in a transaction */  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	fp_unavailable_tm +	bl	CFUNC(fp_unavailable_tm)  	b	interrupt_return_srr  #endif @@ -1839,7 +1839,7 @@ EXC_VIRT_END(decrementer, 0x4900, 0x80)  EXC_COMMON_BEGIN(decrementer_common)  	GEN_COMMON decrementer  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	timer_interrupt +	bl	CFUNC(timer_interrupt)  	b	interrupt_return_srr @@ -1925,9 +1925,9 @@ EXC_COMMON_BEGIN(doorbell_super_common)  	GEN_COMMON doorbell_super  	addi	r3,r1,STACK_INT_FRAME_REGS  #ifdef CONFIG_PPC_DOORBELL -	bl	doorbell_exception +	bl	CFUNC(doorbell_exception)  #else -	bl	unknown_async_exception +	bl	CFUNC(unknown_async_exception)  #endif  	b	interrupt_return_srr @@ -2091,7 +2091,7 @@ EXC_VIRT_END(single_step, 0x4d00, 0x100)  EXC_COMMON_BEGIN(single_step_common)  	GEN_COMMON single_step  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	single_step_exception +	bl	CFUNC(single_step_exception)  	b	interrupt_return_srr @@ -2126,9 +2126,9 @@ EXC_COMMON_BEGIN(h_data_storage_common)  	GEN_COMMON h_data_storage  	addi    r3,r1,STACK_INT_FRAME_REGS  BEGIN_MMU_FTR_SECTION -	bl      do_bad_page_fault_segv +	bl	CFUNC(do_bad_page_fault_segv)  MMU_FTR_SECTION_ELSE -	bl      unknown_exception +	bl	CFUNC(unknown_exception)  ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)  	b       interrupt_return_hsrr @@ -2154,7 +2154,7 @@ EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)  EXC_COMMON_BEGIN(h_instr_storage_common)  	GEN_COMMON h_instr_storage  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	unknown_exception +	bl	CFUNC(unknown_exception)  	b	interrupt_return_hsrr @@ -2177,7 +2177,7 @@ EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)  EXC_COMMON_BEGIN(emulation_assist_common)  	GEN_COMMON emulation_assist  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	emulation_assist_interrupt +	bl	CFUNC(emulation_assist_interrupt)  	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */  	b	interrupt_return_hsrr @@ -2237,7 +2237,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)  	__GEN_COMMON_BODY hmi_exception_early  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	hmi_exception_realmode +	bl	CFUNC(hmi_exception_realmode)  	cmpdi	cr0,r3,0  	bne	1f @@ -2255,7 +2255,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)  EXC_COMMON_BEGIN(hmi_exception_common)  	GEN_COMMON hmi_exception  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	handle_hmi_exception +	bl	CFUNC(handle_hmi_exception)  	b	interrupt_return_hsrr @@ -2290,9 +2290,9 @@ EXC_COMMON_BEGIN(h_doorbell_common)  	GEN_COMMON h_doorbell  	addi	r3,r1,STACK_INT_FRAME_REGS  #ifdef CONFIG_PPC_DOORBELL -	bl	doorbell_exception +	bl	CFUNC(doorbell_exception)  #else -	bl	unknown_async_exception +	bl	CFUNC(unknown_async_exception)  #endif  	b	interrupt_return_hsrr @@ -2325,7 +2325,7 @@ EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)  EXC_COMMON_BEGIN(h_virt_irq_common)  	GEN_COMMON h_virt_irq  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	do_IRQ +	bl	CFUNC(do_IRQ)  	b	interrupt_return_hsrr @@ -2374,10 +2374,10 @@ EXC_COMMON_BEGIN(performance_monitor_common)  	lbz	r4,PACAIRQSOFTMASK(r13)  	cmpdi	r4,IRQS_ENABLED  	bne	1f -	bl	performance_monitor_exception_async +	bl	CFUNC(performance_monitor_exception_async)  	b	interrupt_return_srr  1: -	bl	performance_monitor_exception_nmi +	bl	CFUNC(performance_monitor_exception_nmi)  	/* Clear MSR_RI before setting SRR0 and SRR1. */  	li	r9,0  	mtmsrd	r9,1 @@ -2421,19 +2421,19 @@ BEGIN_FTR_SECTION  	bne-	2f    END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)  #endif -	bl	load_up_altivec +	bl	CFUNC(load_up_altivec)  	b	fast_interrupt_return_srr  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM  2:	/* User process was in a transaction */  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	altivec_unavailable_tm +	bl	CFUNC(altivec_unavailable_tm)  	b	interrupt_return_srr  #endif  1:  END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)  #endif  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	altivec_unavailable_exception +	bl	CFUNC(altivec_unavailable_exception)  	b	interrupt_return_srr @@ -2475,14 +2475,14 @@ BEGIN_FTR_SECTION  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM  2:	/* User process was in a transaction */  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	vsx_unavailable_tm +	bl	CFUNC(vsx_unavailable_tm)  	b	interrupt_return_srr  #endif  1:  END_FTR_SECTION_IFSET(CPU_FTR_VSX)  #endif  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	vsx_unavailable_exception +	bl	CFUNC(vsx_unavailable_exception)  	b	interrupt_return_srr @@ -2509,7 +2509,7 @@ EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)  EXC_COMMON_BEGIN(facility_unavailable_common)  	GEN_COMMON facility_unavailable  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	facility_unavailable_exception +	bl	CFUNC(facility_unavailable_exception)  	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */  	b	interrupt_return_srr @@ -2537,7 +2537,7 @@ EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)  EXC_COMMON_BEGIN(h_facility_unavailable_common)  	GEN_COMMON h_facility_unavailable  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	facility_unavailable_exception +	bl	CFUNC(facility_unavailable_exception)  	/* XXX Shouldn't be necessary in practice */  	HANDLER_RESTORE_NVGPRS()  	b	interrupt_return_hsrr @@ -2568,7 +2568,7 @@ EXC_VIRT_NONE(0x5200, 0x100)  EXC_COMMON_BEGIN(cbe_system_error_common)  	GEN_COMMON cbe_system_error  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	cbe_system_error_exception +	bl	CFUNC(cbe_system_error_exception)  	b	interrupt_return_hsrr  #else /* CONFIG_CBE_RAS */ @@ -2599,7 +2599,7 @@ EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)  EXC_COMMON_BEGIN(instruction_breakpoint_common)  	GEN_COMMON instruction_breakpoint  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	instruction_breakpoint_exception +	bl	CFUNC(instruction_breakpoint_exception)  	b	interrupt_return_srr @@ -2721,7 +2721,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)  EXC_COMMON_BEGIN(denorm_exception_common)  	GEN_COMMON denorm_exception  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	unknown_exception +	bl	CFUNC(unknown_exception)  	b	interrupt_return_hsrr @@ -2738,7 +2738,7 @@ EXC_VIRT_NONE(0x5600, 0x100)  EXC_COMMON_BEGIN(cbe_maintenance_common)  	GEN_COMMON cbe_maintenance  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	cbe_maintenance_exception +	bl	CFUNC(cbe_maintenance_exception)  	b	interrupt_return_hsrr  #else /* CONFIG_CBE_RAS */ @@ -2764,10 +2764,10 @@ EXC_COMMON_BEGIN(altivec_assist_common)  	GEN_COMMON altivec_assist  	addi	r3,r1,STACK_INT_FRAME_REGS  #ifdef CONFIG_ALTIVEC -	bl	altivec_assist_exception +	bl	CFUNC(altivec_assist_exception)  	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */  #else -	bl	unknown_exception +	bl	CFUNC(unknown_exception)  #endif  	b	interrupt_return_srr @@ -2785,7 +2785,7 @@ EXC_VIRT_NONE(0x5800, 0x100)  EXC_COMMON_BEGIN(cbe_thermal_common)  	GEN_COMMON cbe_thermal  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	cbe_thermal_exception +	bl	CFUNC(cbe_thermal_exception)  	b	interrupt_return_hsrr  #else /* CONFIG_CBE_RAS */ @@ -2818,7 +2818,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)  	__GEN_COMMON_BODY soft_nmi  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	soft_nmi_interrupt +	bl	CFUNC(soft_nmi_interrupt)  	/* Clear MSR_RI before setting SRR0 and SRR1. */  	li	r9,0 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1febb56ebaeb..f132d8704263 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -76,6 +76,13 @@   *   2. The kernel is entered at __start   */ +/* + * boot_from_prom and prom_init run at the physical address. Everything + * after prom and kexec entry run at the virtual address (PAGE_OFFSET). + * Secondaries run at the virtual address from generic_secondary_common_init + * onward. + */ +  OPEN_FIXED_SECTION(first_256B, 0x0, 0x100)  USE_FIXED_SECTION(first_256B)  	/* @@ -303,13 +310,11 @@ _GLOBAL(fsl_secondary_thread_init)  	/* turn on 64-bit mode */  	bl	enable_64b_mode -	/* get a valid TOC pointer, wherever we're mapped at */ -	bl	relative_toc -	tovirt(r2,r2) -  	/* Book3E initialization */  	mr	r3,r24  	bl	book3e_secondary_thread_init +	bl	relative_toc +  	b	generic_secondary_common_init  #endif /* CONFIG_PPC_BOOK3E_64 */ @@ -325,22 +330,24 @@ _GLOBAL(fsl_secondary_thread_init)   */  _GLOBAL(generic_secondary_smp_init)  	FIXUP_ENDIAN + +	li	r13,0 + +	/* Poison TOC */ +	li	r2,-1 +  	mr	r24,r3  	mr	r25,r4  	/* turn on 64-bit mode */  	bl	enable_64b_mode -	/* get a valid TOC pointer, wherever we're mapped at */ -	bl	relative_toc -	tovirt(r2,r2) -  #ifdef CONFIG_PPC_BOOK3E_64  	/* Book3E initialization */  	mr	r3,r24  	mr	r4,r25  	bl	book3e_secondary_core_init - +	/* Now NIA and r2 are relocated to PAGE_OFFSET if not already */  /*   * After common core init has finished, check if the current thread is the   * one we wanted to boot. If not, start the specified thread and stop the @@ -378,6 +385,16 @@ _GLOBAL(generic_secondary_smp_init)  10:  	b	10b  20: +#else +	/* Now the MMU is off, can branch to our PAGE_OFFSET address */ +	bcl	20,31,$+4 +1:	mflr	r11 +	addi	r11,r11,(2f - 1b) +	tovirt(r11, r11) +	mtctr	r11 +	bctr +2: +	bl	relative_toc  #endif  generic_secondary_common_init: @@ -492,6 +509,8 @@ SYM_FUNC_START_LOCAL(start_initialization_book3s)  	/* Switch off MMU if not already off */  	bl	__mmu_off +	/* Now the MMU is off, can return to our PAGE_OFFSET address */ +	tovirt(r25,r25)  	mtlr	r25  	blr  SYM_FUNC_END(start_initialization_book3s) @@ -515,14 +534,8 @@ __start_initialization_multiplatform:  	/* Zero r13 (paca) so early program check / mce don't use it */  	li	r13,0 -	/* Get TOC pointer (current runtime address) */ -	bl	relative_toc - -	/* find out where we are now */ -	bcl	20,31,$+4 -0:	mflr	r26			/* r26 = runtime addr here */ -	addis	r26,r26,(_stext - 0b)@ha -	addi	r26,r26,(_stext - 0b)@l	/* current runtime base addr */ +	/* Poison TOC */ +	li	r2,-1  	/*  	 * Are we booted from a PROM Of-type client-interface ? @@ -540,16 +553,41 @@ __start_initialization_multiplatform:  	mr	r29,r9  #endif +	/* Get TOC pointer (current runtime address) */ +	bl	relative_toc + +	/* These functions return to the virtual (PAGE_OFFSET) address */  #ifdef CONFIG_PPC_BOOK3E_64  	bl	start_initialization_book3e  #else  	bl	start_initialization_book3s  #endif /* CONFIG_PPC_BOOK3E_64 */ + +	/* Get TOC pointer, virtual */ +	bl	relative_toc + +	/* find out where we are now */ + +	/* OPAL doesn't pass base address in r4, have to derive it. */ +	bcl	20,31,$+4 +0:	mflr	r26			/* r26 = runtime addr here */ +	addis	r26,r26,(_stext - 0b)@ha +	addi	r26,r26,(_stext - 0b)@l	/* current runtime base addr */ +  	b	__after_prom_start  __REF  __boot_from_prom:  #ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE +	/* Get TOC pointer, non-virtual */ +	bl	relative_toc + +	/* find out where we are now */ +	bcl	20,31,$+4 +0:	mflr	r26			/* r26 = runtime addr here */ +	addis	r26,r26,(_stext - 0b)@ha +	addi	r26,r26,(_stext - 0b)@l	/* current runtime base addr */ +  	/* Save parameters */  	mr	r31,r3  	mr	r30,r4 @@ -579,7 +617,7 @@ __boot_from_prom:  	/* Do all of the interaction with OF client interface */  	mr	r8,r26 -	bl	prom_init +	bl	CFUNC(prom_init)  #endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */  	/* We never return. We also hit that trap if trying to boot @@ -590,18 +628,11 @@ __boot_from_prom:  __after_prom_start:  #ifdef CONFIG_RELOCATABLE  	/* process relocations for the final address of the kernel */ -	lis	r25,PAGE_OFFSET@highest	/* compute virtual base of kernel */ -	sldi	r25,r25,32 -#if defined(CONFIG_PPC_BOOK3E_64) -	tovirt(r26,r26)		/* on booke, we already run at PAGE_OFFSET */ -#endif  	lwz	r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26) -#if defined(CONFIG_PPC_BOOK3E_64) -	tophys(r26,r26) -#endif  	cmplwi	cr0,r7,1	/* flagged to stay where we are ? */ -	bne	1f -	add	r25,r25,r26 +	mr	r25,r26		/* then use current kernel base */ +	beq	1f +	LOAD_REG_IMMEDIATE(r25, PAGE_OFFSET) /* else use static kernel base */  1:	mr	r3,r25  	bl	relocate  #if defined(CONFIG_PPC_BOOK3E_64) @@ -617,14 +648,8 @@ __after_prom_start:   *   * Note: This process overwrites the OF exception vectors.   */ -	li	r3,0			/* target addr */ -#ifdef CONFIG_PPC_BOOK3E_64 -	tovirt(r3,r3)		/* on booke, we already run at PAGE_OFFSET */ -#endif +	LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET)  	mr.	r4,r26			/* In some cases the loader may  */ -#if defined(CONFIG_PPC_BOOK3E_64) -	tovirt(r4,r4) -#endif  	beq	9f			/* have already put us at zero */  	li	r6,0x100		/* Start offset, the first 0x100 */  					/* bytes were copied earlier.	 */ @@ -635,9 +660,6 @@ __after_prom_start:   * variable __run_at_load, if it is set the kernel is treated as relocatable   * kernel, otherwise it will be moved to PHYSICAL_START   */ -#if defined(CONFIG_PPC_BOOK3E_64) -	tovirt(r26,r26)		/* on booke, we already run at PAGE_OFFSET */ -#endif  	lwz	r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)  	cmplwi	cr0,r7,1  	bne	3f @@ -756,9 +778,15 @@ _GLOBAL(pmac_secondary_start)  	sync  	slbia -	/* get TOC pointer (real address) */ +	/* Branch to our PAGE_OFFSET address */ +	bcl	20,31,$+4 +1:	mflr	r11 +	addi	r11,r11,(2f - 1b) +	tovirt(r11, r11) +	mtctr	r11 +	bctr +2:  	bl	relative_toc -	tovirt(r2,r2)  	/* Copy some CPU settings from CPU 0 */  	bl	__restore_cpu_ppc970 @@ -817,7 +845,7 @@ __secondary_start:  	 * can turn it on below. This is a call to C, which is OK, we're still  	 * running on the emergency stack.  	 */ -	bl	early_setup_secondary +	bl	CFUNC(early_setup_secondary)  	/*  	 * The primary has initialized our kernel stack for us in the paca, grab @@ -856,7 +884,7 @@ start_secondary_prolog:  	LOAD_PACA_TOC()  	li	r3,0  	std	r3,0(r1)		/* Zero the stack frame pointer	*/ -	bl	start_secondary +	bl	CFUNC(start_secondary)  	b	.  /*   * Reset stack pointer and call start_secondary @@ -867,7 +895,7 @@ _GLOBAL(start_secondary_resume)  	ld	r1,PACAKSAVE(r13)	/* Reload kernel stack pointer */  	li	r3,0  	std	r3,0(r1)		/* Zero the stack frame pointer	*/ -	bl	start_secondary +	bl	CFUNC(start_secondary)  	b	.  #endif @@ -897,10 +925,15 @@ SYM_FUNC_END(enable_64b_mode)   * TOC in -mcmodel=medium mode. After we relocate to 0 but before   * the MMU is on we need our TOC to be a virtual address otherwise   * these pointers will be real addresses which may get stored and - * accessed later with the MMU on. We use tovirt() at the call - * sites to handle this. + * accessed later with the MMU on. We branch to the virtual address + * while still in real mode then call relative_toc again to handle + * this.   */  _GLOBAL(relative_toc) +#ifdef CONFIG_PPC_KERNEL_PCREL +	tdnei	r2,-1 +	blr +#else  	mflr	r0  	bcl	20,31,$+4  0:	mflr	r11 @@ -911,15 +944,15 @@ _GLOBAL(relative_toc)  .balign 8  p_toc:	.8byte	.TOC. - 0b +#endif  /*   * This is where the main kernel code starts.   */  __REF  start_here_multiplatform: -	/* set up the TOC */ -	bl      relative_toc -	tovirt(r2,r2) +	/* Adjust TOC for moved kernel. Could adjust when moving it instead. */ +	bl	relative_toc  	/* Clear out the BSS. It may have been done in prom_init,  	 * already but that's irrelevant since prom_init will soon @@ -972,7 +1005,7 @@ start_here_multiplatform:  	 */  #ifdef CONFIG_KASAN -	bl	kasan_early_init +	bl	CFUNC(kasan_early_init)  #endif  	/* Restore parameters passed from prom_init/kexec */  	mr	r3,r31 @@ -1005,7 +1038,7 @@ start_here_common:  	stb	r0,PACAIRQHAPPENED(r13)  	/* Generic kernel entry */ -	bl	start_kernel +	bl	CFUNC(start_kernel)  	/* Not reached */  0:	trap diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 37d43c172676..b6b5b01a173c 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -5,6 +5,7 @@  #include <asm/ptrace.h>	/* for STACK_FRAME_REGS_MARKER */  #include <asm/kvm_asm.h>  #include <asm/kvm_booke_hv_asm.h> +#include <asm/thread_info.h>	/* for THREAD_SHIFT */  #ifdef __ASSEMBLY__ diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index b9a725abc596..b1c0418b25c8 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -107,19 +107,11 @@ static struct ctl_table powersave_nap_ctl_table[] = {  	},  	{}  }; -static struct ctl_table powersave_nap_sysctl_root[] = { -	{ -		.procname	= "kernel", -		.mode		= 0555, -		.child		= powersave_nap_ctl_table, -	}, -	{} -};  static int __init  register_powersave_nap_sysctl(void)  { -	register_sysctl_table(powersave_nap_sysctl_root); +	register_sysctl("kernel", powersave_nap_ctl_table);  	return 0;  } diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 0ec1581619db..e34c72285b4e 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -95,7 +95,7 @@ static notrace void booke_load_dbcr0(void)  #endif  } -static void check_return_regs_valid(struct pt_regs *regs) +static notrace void check_return_regs_valid(struct pt_regs *regs)  {  #ifdef CONFIG_PPC_BOOK3S_64  	unsigned long trap, srr0, srr1; diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index fccc34489add..bd863702d812 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -101,12 +101,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	 * state of kernel code.  	 */  	SANITIZE_SYSCALL_GPRS() -	bl	system_call_exception +	bl	CFUNC(system_call_exception)  .Lsyscall_vectored_\name\()_exit:  	addi	r4,r1,STACK_INT_FRAME_REGS  	li	r5,1 /* scv */ -	bl	syscall_exit_prepare +	bl	CFUNC(syscall_exit_prepare)  	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */  .Lsyscall_vectored_\name\()_rst_start:  	lbz	r11,PACAIRQHAPPENED(r13) @@ -185,7 +185,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart)  	addi	r4,r1,STACK_INT_FRAME_REGS  	li	r11,IRQS_ALL_DISABLED  	stb	r11,PACAIRQSOFTMASK(r13) -	bl	syscall_exit_restart +	bl	CFUNC(syscall_exit_restart)  	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */  	b	.Lsyscall_vectored_\name\()_rst_start  1: @@ -286,12 +286,12 @@ END_BTB_FLUSH_SECTION  	 * state of kernel code.  	 */  	SANITIZE_SYSCALL_GPRS() -	bl	system_call_exception +	bl	CFUNC(system_call_exception)  .Lsyscall_exit:  	addi	r4,r1,STACK_INT_FRAME_REGS  	li	r5,0 /* !scv */ -	bl	syscall_exit_prepare +	bl	CFUNC(syscall_exit_prepare)  	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */  #ifdef CONFIG_PPC_BOOK3S  .Lsyscall_rst_start: @@ -372,7 +372,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_restart)  	addi	r4,r1,STACK_INT_FRAME_REGS  	li	r11,IRQS_ALL_DISABLED  	stb	r11,PACAIRQSOFTMASK(r13) -	bl	syscall_exit_restart +	bl	CFUNC(syscall_exit_restart)  	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */  	b	.Lsyscall_rst_start  1: @@ -401,7 +401,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr)  	li	r3,0 /* 0 return value, no EMULATE_STACK_STORE */  	bne+	.Lfast_kernel_interrupt_return_srr  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	unrecoverable_exception +	bl	CFUNC(unrecoverable_exception)  	b	. /* should not get here */  #else  	bne	.Lfast_user_interrupt_return_srr @@ -419,7 +419,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\())  interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */  _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	interrupt_exit_user_prepare +	bl	CFUNC(interrupt_exit_user_prepare)  #ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS  	cmpdi	r3,0  	bne-	.Lrestore_nvgprs_\srr @@ -523,7 +523,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)  	addi	r3,r1,STACK_INT_FRAME_REGS  	li	r11,IRQS_ALL_DISABLED  	stb	r11,PACAIRQSOFTMASK(r13) -	bl	interrupt_exit_user_restart +	bl	CFUNC(interrupt_exit_user_restart)  	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */  	b	.Linterrupt_return_\srr\()_user_rst_start  1: @@ -536,7 +536,7 @@ RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr  interrupt_return_\srr\()_kernel:  _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)  	addi	r3,r1,STACK_INT_FRAME_REGS -	bl	interrupt_exit_kernel_prepare +	bl	CFUNC(interrupt_exit_kernel_prepare)  	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */  .Linterrupt_return_\srr\()_kernel_rst_start: @@ -705,7 +705,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart)  	addi	r3,r1,STACK_INT_FRAME_REGS  	li	r11,IRQS_ALL_DISABLED  	stb	r11,PACAIRQSOFTMASK(r13) -	bl	interrupt_exit_kernel_restart +	bl	CFUNC(interrupt_exit_kernel_restart)  	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */  	b	.Linterrupt_return_\srr\()_kernel_rst_start  1: @@ -727,21 +727,20 @@ DEFINE_FIXED_SYMBOL(__end_soft_masked, text)  #ifdef CONFIG_PPC_BOOK3S  _GLOBAL(ret_from_fork_scv) -	bl	schedule_tail -	REST_NVGPRS(r1) +	bl	CFUNC(schedule_tail) +	HANDLER_RESTORE_NVGPRS()  	li	r3,0	/* fork() return value */  	b	.Lsyscall_vectored_common_exit  #endif  _GLOBAL(ret_from_fork) -	bl	schedule_tail -	REST_NVGPRS(r1) +	bl	CFUNC(schedule_tail) +	HANDLER_RESTORE_NVGPRS()  	li	r3,0	/* fork() return value */  	b	.Lsyscall_exit -_GLOBAL(ret_from_kernel_thread) -	bl	schedule_tail -	REST_NVGPRS(r1) +_GLOBAL(ret_from_kernel_user_thread) +	bl	CFUNC(schedule_tail)  	mtctr	r14  	mr	r3,r15  #ifdef CONFIG_PPC64_ELF_ABI_V2 @@ -749,4 +748,25 @@ _GLOBAL(ret_from_kernel_thread)  #endif  	bctrl  	li	r3,0 +	/* +	 * It does not matter whether this returns via the scv or sc path +	 * because it returns as execve() and therefore has no calling ABI +	 * (i.e., it sets registers according to the exec()ed entry point). +	 */  	b	.Lsyscall_exit + +_GLOBAL(start_kernel_thread) +	bl	CFUNC(schedule_tail) +	mtctr	r14 +	mr	r3,r15 +#ifdef CONFIG_PPC64_ELF_ABI_V2 +	mr	r12,r14 +#endif +	bctrl +	/* +	 * This must not return. We actually want to BUG here, not WARN, +	 * because BUG will exit the process which is what the kernel thread +	 * should have done, which may give some hope of continuing. +	 */ +100:	trap +	EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0 diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ee95937bdaf1..0089dd49b4cb 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -35,6 +35,7 @@  #include <asm/vio.h>  #include <asm/tce.h>  #include <asm/mmu_context.h> +#include <asm/ppc-pci.h>  #define DBG(...) @@ -1086,7 +1087,7 @@ void iommu_tce_kill(struct iommu_table *tbl,  }  EXPORT_SYMBOL_GPL(iommu_tce_kill); -int iommu_take_ownership(struct iommu_table *tbl) +static int iommu_take_ownership(struct iommu_table *tbl)  {  	unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;  	int ret = 0; @@ -1118,9 +1119,8 @@ int iommu_take_ownership(struct iommu_table *tbl)  	return ret;  } -EXPORT_SYMBOL_GPL(iommu_take_ownership); -void iommu_release_ownership(struct iommu_table *tbl) +static void iommu_release_ownership(struct iommu_table *tbl)  {  	unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; @@ -1137,7 +1137,6 @@ void iommu_release_ownership(struct iommu_table *tbl)  		spin_unlock(&tbl->pools[i].lock);  	spin_unlock_irqrestore(&tbl->large_pool.lock, flags);  } -EXPORT_SYMBOL_GPL(iommu_release_ownership);  int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)  { @@ -1158,8 +1157,14 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)  	pr_debug("%s: Adding %s to iommu group %d\n",  		 __func__, dev_name(dev),  iommu_group_id(table_group->group)); - -	return iommu_group_add_device(table_group->group, dev); +	/* +	 * This is still not adding devices via the IOMMU bus notifier because +	 * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls +	 * pcibios_scan_phb() first (and this guy adds devices and triggers +	 * the notifier) and only then it calls pci_bus_add_devices() which +	 * configures DMA for buses which also creates PEs and IOMMU groups. +	 */ +	return iommu_probe_device(dev);  }  EXPORT_SYMBOL_GPL(iommu_add_device); @@ -1179,4 +1184,233 @@ void iommu_del_device(struct device *dev)  	iommu_group_remove_device(dev);  }  EXPORT_SYMBOL_GPL(iommu_del_device); + +/* + * A simple iommu_table_group_ops which only allows reusing the existing + * iommu_table. This handles VFIO for POWER7 or the nested KVM. + * The ops does not allow creating windows and only allows reusing the existing + * one if it matches table_group->tce32_start/tce32_size/page_shift. + */ +static unsigned long spapr_tce_get_table_size(__u32 page_shift, +					      __u64 window_size, __u32 levels) +{ +	unsigned long size; + +	if (levels > 1) +		return ~0U; +	size = window_size >> (page_shift - 3); +	return size; +} + +static long spapr_tce_create_table(struct iommu_table_group *table_group, int num, +				   __u32 page_shift, __u64 window_size, __u32 levels, +				   struct iommu_table **ptbl) +{ +	struct iommu_table *tbl = table_group->tables[0]; + +	if (num > 0) +		return -EPERM; + +	if (tbl->it_page_shift != page_shift || +	    tbl->it_size != (window_size >> page_shift) || +	    tbl->it_indirect_levels != levels - 1) +		return -EINVAL; + +	*ptbl = iommu_tce_table_get(tbl); +	return 0; +} + +static long spapr_tce_set_window(struct iommu_table_group *table_group, +				 int num, struct iommu_table *tbl) +{ +	return tbl == table_group->tables[num] ? 0 : -EPERM; +} + +static long spapr_tce_unset_window(struct iommu_table_group *table_group, int num) +{ +	return 0; +} + +static long spapr_tce_take_ownership(struct iommu_table_group *table_group) +{ +	int i, j, rc = 0; + +	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { +		struct iommu_table *tbl = table_group->tables[i]; + +		if (!tbl || !tbl->it_map) +			continue; + +		rc = iommu_take_ownership(tbl); +		if (!rc) +			continue; + +		for (j = 0; j < i; ++j) +			iommu_release_ownership(table_group->tables[j]); +		return rc; +	} +	return 0; +} + +static void spapr_tce_release_ownership(struct iommu_table_group *table_group) +{ +	int i; + +	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { +		struct iommu_table *tbl = table_group->tables[i]; + +		if (!tbl) +			continue; + +		iommu_table_clear(tbl); +		if (tbl->it_map) +			iommu_release_ownership(tbl); +	} +} + +struct iommu_table_group_ops spapr_tce_table_group_ops = { +	.get_table_size = spapr_tce_get_table_size, +	.create_table = spapr_tce_create_table, +	.set_window = spapr_tce_set_window, +	.unset_window = spapr_tce_unset_window, +	.take_ownership = spapr_tce_take_ownership, +	.release_ownership = spapr_tce_release_ownership, +}; + +/* + * A simple iommu_ops to allow less cruft in generic VFIO code. + */ +static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom, +					       struct device *dev) +{ +	struct iommu_group *grp = iommu_group_get(dev); +	struct iommu_table_group *table_group; +	int ret = -EINVAL; + +	if (!grp) +		return -ENODEV; + +	table_group = iommu_group_get_iommudata(grp); +	ret = table_group->ops->take_ownership(table_group); +	iommu_group_put(grp); + +	return ret; +} + +static void spapr_tce_blocking_iommu_set_platform_dma(struct device *dev) +{ +	struct iommu_group *grp = iommu_group_get(dev); +	struct iommu_table_group *table_group; + +	table_group = iommu_group_get_iommudata(grp); +	table_group->ops->release_ownership(table_group); +} + +static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = { +	.attach_dev = spapr_tce_blocking_iommu_attach_dev, +}; + +static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap) +{ +	switch (cap) { +	case IOMMU_CAP_CACHE_COHERENCY: +		return true; +	default: +		break; +	} + +	return false; +} + +static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type) +{ +	struct iommu_domain *dom; + +	if (type != IOMMU_DOMAIN_BLOCKED) +		return NULL; + +	dom = kzalloc(sizeof(*dom), GFP_KERNEL); +	if (!dom) +		return NULL; + +	dom->ops = &spapr_tce_blocking_domain_ops; + +	return dom; +} + +static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) +{ +	struct pci_dev *pdev; +	struct pci_controller *hose; + +	if (!dev_is_pci(dev)) +		return ERR_PTR(-EPERM); + +	pdev = to_pci_dev(dev); +	hose = pdev->bus->sysdata; + +	return &hose->iommu; +} + +static void spapr_tce_iommu_release_device(struct device *dev) +{ +} + +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev) +{ +	struct pci_controller *hose; +	struct pci_dev *pdev; + +	pdev = to_pci_dev(dev); +	hose = pdev->bus->sysdata; + +	if (!hose->controller_ops.device_group) +		return ERR_PTR(-ENOENT); + +	return hose->controller_ops.device_group(hose, pdev); +} + +static const struct iommu_ops spapr_tce_iommu_ops = { +	.capable = spapr_tce_iommu_capable, +	.domain_alloc = spapr_tce_iommu_domain_alloc, +	.probe_device = spapr_tce_iommu_probe_device, +	.release_device = spapr_tce_iommu_release_device, +	.device_group = spapr_tce_iommu_device_group, +	.set_platform_dma_ops = spapr_tce_blocking_iommu_set_platform_dma, +}; + +static struct attribute *spapr_tce_iommu_attrs[] = { +	NULL, +}; + +static struct attribute_group spapr_tce_iommu_group = { +	.name = "spapr-tce-iommu", +	.attrs = spapr_tce_iommu_attrs, +}; + +static const struct attribute_group *spapr_tce_iommu_groups[] = { +	&spapr_tce_iommu_group, +	NULL, +}; + +/* + * This registers IOMMU devices of PHBs. This needs to happen + * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and + * before subsys_initcall(iommu_subsys_init). + */ +static int __init spapr_tce_setup_phb_iommus_initcall(void) +{ +	struct pci_controller *hose; + +	list_for_each_entry(hose, &hose_list, list_node) { +		iommu_device_sysfs_add(&hose->iommu, hose->parent, +				       spapr_tce_iommu_groups, "iommu-phb%04x", +				       hose->global_number); +		iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops, +				      hose->parent); +	} +	return 0; +} +postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall); +  #endif /* CONFIG_IOMMU_API */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c9535f2760b5..6f7d4edaa0bc 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -206,7 +206,11 @@ static __always_inline void call_do_softirq(const void *sp)  	asm volatile (  		 PPC_STLU "	%%r1, %[offset](%[sp])	;"  		"mr		%%r1, %[sp]		;" +#ifdef CONFIG_PPC_KERNEL_PCREL +		"bl		%[callee]@notoc		;" +#else  		"bl		%[callee]		;" +#endif  		 PPC_LL "	%%r1, 0(%%r1)		;"  		 : // Outputs  		 : // Inputs @@ -259,7 +263,11 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)  		 PPC_STLU "	%%r1, %[offset](%[sp])	;"  		"mr		%%r4, %%r1		;"  		"mr		%%r1, %[sp]		;" +#ifdef CONFIG_PPC_KERNEL_PCREL +		"bl		%[callee]@notoc		;" +#else  		"bl		%[callee]		;" +#endif  		 PPC_LL "	%%r1, 0(%%r1)		;"  		 : // Outputs  		   "+r" (r3) diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index c788c55512ed..938e66829eae 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -348,13 +348,12 @@ EXPORT_SYMBOL(arch_local_irq_restore);   * already the case when ppc_md.power_save is called). The function   * will return whether to enter power save or just return.   * - * In the former case, it will have notified lockdep of interrupts - * being re-enabled and generally sanitized the lazy irq state, - * and in the latter case it will leave with interrupts hard + * In the former case, it will have generally sanitized the lazy irq + * state, and in the latter case it will leave with interrupts hard   * disabled and marked as such, so the local_irq_enable() call   * in arch_cpu_idle() will properly re-enable everything.   */ -bool prep_irq_for_idle(void) +__cpuidle bool prep_irq_for_idle(void)  {  	/*  	 * First we need to hard disable to ensure no interrupt @@ -370,9 +369,6 @@ bool prep_irq_for_idle(void)  	if (lazy_irq_pending())  		return false; -	/* Tell lockdep we are about to re-enable */ -	trace_hardirqs_on(); -  	/*  	 * Mark interrupts as soft-enabled and clear the  	 * PACA_IRQ_HARD_DIS from the pending mask since we diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index dc746611ebc0..85bdd7d3652f 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -55,80 +55,49 @@ static void remap_isa_base(phys_addr_t pa, unsigned long size)  	}  } -static void pci_process_ISA_OF_ranges(struct device_node *isa_node, -				      unsigned long phb_io_base_phys) +static int process_ISA_OF_ranges(struct device_node *isa_node, +				 unsigned long phb_io_base_phys)  { -	/* We should get some saner parsing here and remove these structs */ -	struct pci_address { -		u32 a_hi; -		u32 a_mid; -		u32 a_lo; -	}; - -	struct isa_address { -		u32 a_hi; -		u32 a_lo; -	}; - -	struct isa_range { -		struct isa_address isa_addr; -		struct pci_address pci_addr; -		unsigned int size; -	}; - -	const struct isa_range *range; -	unsigned long pci_addr; -	unsigned int isa_addr;  	unsigned int size; -	int rlen = 0; +	struct of_range_parser parser; +	struct of_range range; -	range = of_get_property(isa_node, "ranges", &rlen); -	if (range == NULL || (rlen < sizeof(struct isa_range))) +	if (of_range_parser_init(&parser, isa_node))  		goto inval_range; -	/* From "ISA Binding to 1275" -	 * The ranges property is laid out as an array of elements, -	 * each of which comprises: -	 *   cells 0 - 1:	an ISA address -	 *   cells 2 - 4:	a PCI address -	 *			(size depending on dev->n_addr_cells) -	 *   cell 5:		the size of the range -	 */ -	if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) { -		range++; -		rlen -= sizeof(struct isa_range); -		if (rlen < sizeof(struct isa_range)) -			goto inval_range; -	} -	if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) -		goto inval_range; +	for_each_of_range(&parser, &range) { +		if ((range.flags & ISA_SPACE_MASK) != ISA_SPACE_IO) +			continue; -	isa_addr = range->isa_addr.a_lo; -	pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | -		range->pci_addr.a_lo; +		if (range.cpu_addr == OF_BAD_ADDR) { +			pr_err("ISA: Bad CPU mapping: %s\n", __func__); +			return -EINVAL; +		} -	/* Assume these are both zero. Note: We could fix that and -	 * do a proper parsing instead ... oh well, that will do for -	 * now as nobody uses fancy mappings for ISA bridges -	 */ -	if ((pci_addr != 0) || (isa_addr != 0)) { -		printk(KERN_ERR "unexpected isa to pci mapping: %s\n", -		       __func__); -		return; -	} +		/* We need page alignment */ +		if ((range.bus_addr & ~PAGE_MASK) || (range.cpu_addr & ~PAGE_MASK)) { +			pr_warn("ISA: bridge %pOF has non aligned IO range\n", isa_node); +			return -EINVAL; +		} -	/* Align size and make sure it's cropped to 64K */ -	size = PAGE_ALIGN(range->size); -	if (size > 0x10000) -		size = 0x10000; +		/* Align size and make sure it's cropped to 64K */ +		size = PAGE_ALIGN(range.size); +		if (size > 0x10000) +			size = 0x10000; -	remap_isa_base(phb_io_base_phys, size); -	return; +		if (!phb_io_base_phys) +			phb_io_base_phys = range.cpu_addr; + +		remap_isa_base(phb_io_base_phys, size); +		return 0; +	}  inval_range: -	printk(KERN_ERR "no ISA IO ranges or unexpected isa range, " -	       "mapping 64k\n"); -	remap_isa_base(phb_io_base_phys, 0x10000); +	if (!phb_io_base_phys) { +		pr_err("no ISA IO ranges or unexpected isa range, mapping 64k\n"); +		remap_isa_base(phb_io_base_phys, 0x10000); +	} +	return 0;  } @@ -170,7 +139,7 @@ void __init isa_bridge_find_early(struct pci_controller *hose)  	isa_bridge_devnode = np;  	/* Now parse the "ranges" property and setup the ISA mapping */ -	pci_process_ISA_OF_ranges(np, hose->io_base_phys); +	process_ISA_OF_ranges(np, hose->io_base_phys);  	/* Set the global ISA io base to indicate we have an ISA bridge */  	isa_io_base = ISA_IO_BASE; @@ -186,75 +155,15 @@ void __init isa_bridge_find_early(struct pci_controller *hose)   */  void __init isa_bridge_init_non_pci(struct device_node *np)  { -	const __be32 *ranges, *pbasep = NULL; -	int rlen, i, rs; -	u32 na, ns, pna; -	u64 cbase, pbase, size = 0; +	int ret;  	/* If we already have an ISA bridge, bail off */  	if (isa_bridge_devnode != NULL)  		return; -	pna = of_n_addr_cells(np); -	if (of_property_read_u32(np, "#address-cells", &na) || -	    of_property_read_u32(np, "#size-cells", &ns)) { -		pr_warn("ISA: Non-PCI bridge %pOF is missing address format\n", -			np); -		return; -	} - -	/* Check it's a supported address format */ -	if (na != 2 || ns != 1) { -		pr_warn("ISA: Non-PCI bridge %pOF has unsupported address format\n", -			np); -		return; -	} -	rs = na + ns + pna; - -	/* Grab the ranges property */ -	ranges = of_get_property(np, "ranges", &rlen); -	if (ranges == NULL || rlen < rs) { -		pr_warn("ISA: Non-PCI bridge %pOF has absent or invalid ranges\n", -			np); -		return; -	} - -	/* Parse it. We are only looking for IO space */ -	for (i = 0; (i + rs - 1) < rlen; i += rs) { -		if (be32_to_cpup(ranges + i) != 1) -			continue; -		cbase = be32_to_cpup(ranges + i + 1); -		size = of_read_number(ranges + i + na + pna, ns); -		pbasep = ranges + i + na; -		break; -	} - -	/* Got something ? */ -	if (!size || !pbasep) { -		pr_warn("ISA: Non-PCI bridge %pOF has no usable IO range\n", -			np); +	ret = process_ISA_OF_ranges(np, 0); +	if (ret)  		return; -	} - -	/* Align size and make sure it's cropped to 64K */ -	size = PAGE_ALIGN(size); -	if (size > 0x10000) -		size = 0x10000; - -	/* Map pbase */ -	pbase = of_translate_address(np, pbasep); -	if (pbase == OF_BAD_ADDR) { -		pr_warn("ISA: Non-PCI bridge %pOF failed to translate IO base\n", -			np); -		return; -	} - -	/* We need page alignment */ -	if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) { -		pr_warn("ISA: Non-PCI bridge %pOF has non aligned IO range\n", -			np); -		return; -	}  	/* Got it */  	isa_bridge_devnode = np; @@ -263,7 +172,6 @@ void __init isa_bridge_init_non_pci(struct device_node *np)  	 * and map it  	 */  	isa_io_base = ISA_IO_BASE; -	remap_isa_base(pbase, size);  	pr_debug("ISA: Non-PCI bridge is %pOF\n", np);  } @@ -282,7 +190,7 @@ static void isa_bridge_find_late(struct pci_dev *pdev,  	isa_bridge_pcidev = pdev;  	/* Now parse the "ranges" property and setup the ISA mapping */ -	pci_process_ISA_OF_ranges(devnode, hose->io_base_phys); +	process_ISA_OF_ranges(devnode, hose->io_base_phys);  	/* Set the global ISA io base to indicate we have an ISA bridge */  	isa_io_base = ISA_IO_BASE; diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index f048c424c525..c9ad12461d44 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -171,15 +171,15 @@ static int __init add_legacy_soc_port(struct device_node *np,  	/* We only support ports that have a clock frequency properly  	 * encoded in the device-tree.  	 */ -	if (of_get_property(np, "clock-frequency", NULL) == NULL) +	if (!of_property_present(np, "clock-frequency"))  		return -1;  	/* if reg-offset don't try to use it */ -	if ((of_get_property(np, "reg-offset", NULL) != NULL)) +	if (of_property_present(np, "reg-offset"))  		return -1;  	/* if rtas uses this device, don't try to use it as well */ -	if (of_get_property(np, "used-by-rtas", NULL) != NULL) +	if (of_property_read_bool(np, "used-by-rtas"))  		return -1;  	/* Get the address */ @@ -237,7 +237,7 @@ static int __init add_legacy_isa_port(struct device_node *np,  	 * Note: Don't even try on P8 lpc, we know it's not directly mapped  	 */  	if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") || -	    of_get_property(isa_brg, "ranges", NULL)) { +	    of_property_present(isa_brg, "ranges")) {  		taddr = of_translate_address(np, reg);  		if (taddr == OF_BAD_ADDR)  			taddr = 0; @@ -268,7 +268,7 @@ static int __init add_legacy_pci_port(struct device_node *np,  	 * compatible UARTs on PCI need all sort of quirks (port offsets  	 * etc...) that this code doesn't know about  	 */ -	if (of_get_property(np, "clock-frequency", NULL) == NULL) +	if (!of_property_present(np, "clock-frequency"))  		return -1;  	/* Get the PCI address. Assume BAR 0 */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index c39c07a4c06e..2c9ac70aaf0c 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -432,7 +432,7 @@ _GLOBAL(kexec_sequence)  1:  	/* copy dest pages, flush whole dest image */  	mr	r3,r29 -	bl	kexec_copy_flush	/* (image) */ +	bl	CFUNC(kexec_copy_flush)	/* (image) */  	/* turn off mmu now if not done earlier */  	cmpdi	r26,0 diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index ea6536171778..816a63fd71fb 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -163,8 +163,7 @@ static uint32_t do_plt_call(void *location,  	pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location);  	/* Init, or core PLT? */ -	if (location >= mod->core_layout.base -	    && location < mod->core_layout.base + mod->core_layout.size) +	if (within_module_core((unsigned long)location, mod))  		entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;  	else  		entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; @@ -322,14 +321,14 @@ notrace int module_trampoline_target(struct module *mod, unsigned long addr,  int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)  { -	module->arch.tramp = do_plt_call(module->core_layout.base, +	module->arch.tramp = do_plt_call(module->mem[MOD_TEXT].base,  					 (unsigned long)ftrace_caller,  					 sechdrs, module);  	if (!module->arch.tramp)  		return -ENOENT;  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -	module->arch.tramp_regs = do_plt_call(module->core_layout.base, +	module->arch.tramp_regs = do_plt_call(module->mem[MOD_TEXT].base,  					      (unsigned long)ftrace_regs_caller,  					      sechdrs, module);  	if (!module->arch.tramp_regs) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 2ac78d207f77..92570289ce08 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -101,32 +101,45 @@ static unsigned long stub_func_addr(func_desc_t func)  /* Like PPC32, we need little trampolines to do > 24-bit jumps (into     the kernel itself).  But on PPC64, these need to be used for every     jump, actually, to reset r2 (TOC+0x8000). */ -struct ppc64_stub_entry -{ -	/* 28 byte jump instruction sequence (7 instructions). We only -	 * need 6 instructions on ABIv2 but we always allocate 7 so -	 * so we don't have to modify the trampoline load instruction. */ +struct ppc64_stub_entry { +	/* +	 * 28 byte jump instruction sequence (7 instructions) that can +	 * hold ppc64_stub_insns or stub_insns. Must be 8-byte aligned +	 * with PCREL kernels that use prefix instructions in the stub. +	 */  	u32 jump[7];  	/* Used by ftrace to identify stubs */  	u32 magic;  	/* Data for the above code */  	func_desc_t funcdata; +} __aligned(8); + +struct ppc64_got_entry { +	u64 addr;  };  /*   * PPC64 uses 24 bit jumps, but we need to jump into other modules or   * the kernel which may be further.  So we jump to a stub.   * - * For ELFv1 we need to use this to set up the new r2 value (aka TOC - * pointer).  For ELFv2 it's the callee's responsibility to set up the - * new r2, but for both we need to save the old r2. + * Target address and TOC are loaded from function descriptor in the + * ppc64_stub_entry. + * + * r12 is used to generate the target address, which is required for the + * ELFv2 global entry point calling convention.   * - * We could simply patch the new r2 value and function pointer into - * the stub, but it's significantly shorter to put these values at the - * end of the stub code, and patch the stub address (32-bits relative - * to the TOC ptr, r2) into the stub. + * TOC handling: + * - PCREL does not have a TOC. + * - ELFv2 non-PCREL just has to save r2, the callee is responsible for + *   setting its own TOC pointer at the global entry address. + * - ELFv1 must load the new TOC pointer from the function descriptor.   */  static u32 ppc64_stub_insns[] = { +#ifdef CONFIG_PPC_KERNEL_PCREL +	/* pld r12,addr */ +	PPC_PREFIX_8LS | __PPC_PRFX_R(1), +	PPC_INST_PLD | ___PPC_RT(_R12), +#else  	PPC_RAW_ADDIS(_R11, _R2, 0),  	PPC_RAW_ADDI(_R11, _R11, 0),  	/* Save current r2 value in magic place on the stack. */ @@ -136,13 +149,17 @@ static u32 ppc64_stub_insns[] = {  	/* Set up new r2 from function descriptor */  	PPC_RAW_LD(_R2, _R11, 40),  #endif +#endif  	PPC_RAW_MTCTR(_R12),  	PPC_RAW_BCTR(),  }; -/* Count how many different 24-bit relocations (different symbol, -   different addend) */ -static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) +/* + * Count how many different r_type relocations (different symbol, + * different addend). + */ +static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num, +				 unsigned long r_type)  {  	unsigned int i, r_info, r_addend, _count_relocs; @@ -151,8 +168,8 @@ static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)  	r_info = 0;  	r_addend = 0;  	for (i = 0; i < num; i++) -		/* Only count 24-bit relocs, others don't need stubs */ -		if (ELF64_R_TYPE(rela[i].r_info) == R_PPC_REL24 && +		/* Only count r_type relocs, others don't need stubs */ +		if (ELF64_R_TYPE(rela[i].r_info) == r_type &&  		    (r_info != ELF64_R_SYM(rela[i].r_info) ||  		     r_addend != rela[i].r_addend)) {  			_count_relocs++; @@ -213,7 +230,14 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,  			relocs += count_relocs((void *)sechdrs[i].sh_addr,  					       sechdrs[i].sh_size -					       / sizeof(Elf64_Rela)); +					       / sizeof(Elf64_Rela), +					       R_PPC_REL24); +#ifdef CONFIG_PPC_KERNEL_PCREL +			relocs += count_relocs((void *)sechdrs[i].sh_addr, +					       sechdrs[i].sh_size +					       / sizeof(Elf64_Rela), +					       R_PPC64_REL24_NOTOC); +#endif  		}  	} @@ -230,6 +254,95 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,  	return relocs * sizeof(struct ppc64_stub_entry);  } +#ifdef CONFIG_PPC_KERNEL_PCREL +static int count_pcpu_relocs(const Elf64_Shdr *sechdrs, +			     const Elf64_Rela *rela, unsigned int num, +			     unsigned int symindex, unsigned int pcpu) +{ +	unsigned int i, r_info, r_addend, _count_relocs; + +	_count_relocs = 0; +	r_info = 0; +	r_addend = 0; + +	for (i = 0; i < num; i++) { +		Elf64_Sym *sym; + +		/* This is the symbol it is referring to */ +		sym = (Elf64_Sym *)sechdrs[symindex].sh_addr +			+ ELF64_R_SYM(rela[i].r_info); + +		if (sym->st_shndx == pcpu && +		    (r_info != ELF64_R_SYM(rela[i].r_info) || +		     r_addend != rela[i].r_addend)) { +			_count_relocs++; +			r_info = ELF64_R_SYM(rela[i].r_info); +			r_addend = rela[i].r_addend; +		} +	} + +	return _count_relocs; +} + +/* Get size of potential GOT required. */ +static unsigned long get_got_size(const Elf64_Ehdr *hdr, +				  const Elf64_Shdr *sechdrs, +				  struct module *me) +{ +	/* One extra reloc so it's always 0-addr terminated */ +	unsigned long relocs = 1; +	unsigned int i, symindex = 0; + +	for (i = 1; i < hdr->e_shnum; i++) { +		if (sechdrs[i].sh_type == SHT_SYMTAB) { +			symindex = i; +			break; +		} +	} +	WARN_ON_ONCE(!symindex); + +	/* Every relocated section... */ +	for (i = 1; i < hdr->e_shnum; i++) { +		if (sechdrs[i].sh_type == SHT_RELA) { +			pr_debug("Found relocations in section %u\n", i); +			pr_debug("Ptr: %p.  Number: %llu\n", (void *)sechdrs[i].sh_addr, +				 sechdrs[i].sh_size / sizeof(Elf64_Rela)); + +			/* +			 * Sort the relocation information based on a symbol and +			 * addend key. This is a stable O(n*log n) complexity +			 * algorithm but it will reduce the complexity of +			 * count_relocs() to linear complexity O(n) +			 */ +			sort((void *)sechdrs[i].sh_addr, +			     sechdrs[i].sh_size / sizeof(Elf64_Rela), +			     sizeof(Elf64_Rela), relacmp, NULL); + +			relocs += count_relocs((void *)sechdrs[i].sh_addr, +					       sechdrs[i].sh_size +					       / sizeof(Elf64_Rela), +					       R_PPC64_GOT_PCREL34); + +			/* +			 * Percpu data access typically gets linked with +			 * REL34 relocations, but the percpu section gets +			 * moved at load time and requires that to be +			 * converted to GOT linkage. +			 */ +			if (IS_ENABLED(CONFIG_SMP) && symindex) +				relocs += count_pcpu_relocs(sechdrs, +						(void *)sechdrs[i].sh_addr, +					       sechdrs[i].sh_size +					       / sizeof(Elf64_Rela), +					       symindex, me->arch.pcpu_section); +		} +	} + +	pr_debug("Looks like a total of %lu GOT entries, max\n", relocs); +	return relocs * sizeof(struct ppc64_got_entry); +} +#else /* CONFIG_PPC_KERNEL_PCREL */ +  /* Still needed for ELFv2, for .TOC. */  static void dedotify_versions(struct modversion_info *vers,  			      unsigned long size) @@ -279,6 +392,7 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,  	}  	return NULL;  } +#endif /* CONFIG_PPC_KERNEL_PCREL */  bool module_init_section(const char *name)  { @@ -297,6 +411,15 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,  	for (i = 1; i < hdr->e_shnum; i++) {  		if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)  			me->arch.stubs_section = i; +#ifdef CONFIG_PPC_KERNEL_PCREL +		else if (strcmp(secstrings + sechdrs[i].sh_name, ".data..percpu") == 0) +			me->arch.pcpu_section = i; +		else if (strcmp(secstrings + sechdrs[i].sh_name, ".mygot") == 0) { +			me->arch.got_section = i; +			if (sechdrs[i].sh_addralign < 8) +				sechdrs[i].sh_addralign = 8; +		} +#else  		else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) {  			me->arch.toc_section = i;  			if (sechdrs[i].sh_addralign < 8) @@ -311,6 +434,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,  				 sechdrs[i].sh_size / sizeof(Elf64_Sym),  				 (void *)hdr  				 + sechdrs[sechdrs[i].sh_link].sh_offset); +#endif  	}  	if (!me->arch.stubs_section) { @@ -318,26 +442,47 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,  		return -ENOEXEC;  	} +#ifdef CONFIG_PPC_KERNEL_PCREL +	if (!me->arch.got_section) { +		pr_err("%s: doesn't contain .mygot.\n", me->name); +		return -ENOEXEC; +	} + +	/* Override the got size */ +	sechdrs[me->arch.got_section].sh_size = get_got_size(hdr, sechdrs, me); +#else  	/* If we don't have a .toc, just use .stubs.  We need to set r2  	   to some reasonable value in case the module calls out to  	   other functions via a stub, or if a function pointer escapes  	   the module by some means.  */  	if (!me->arch.toc_section)  		me->arch.toc_section = me->arch.stubs_section; +#endif  	/* Override the stubs size */  	sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs); +  	return 0;  }  #ifdef CONFIG_MPROFILE_KERNEL  static u32 stub_insns[] = { +#ifdef CONFIG_PPC_KERNEL_PCREL +	PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)), +	PPC_RAW_NOP(), /* align the prefix insn */ +	/* paddi r12,r12,addr */ +	PPC_PREFIX_MLS | __PPC_PRFX_R(0), +	PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12), +	PPC_RAW_MTCTR(_R12), +	PPC_RAW_BCTR(), +#else  	PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)),  	PPC_RAW_ADDIS(_R12, _R12, 0),  	PPC_RAW_ADDI(_R12, _R12, 0),  	PPC_RAW_MTCTR(_R12),  	PPC_RAW_BCTR(), +#endif  };  /* @@ -358,18 +503,37 @@ static inline int create_ftrace_stub(struct ppc64_stub_entry *entry,  {  	long reladdr; -	memcpy(entry->jump, stub_insns, sizeof(stub_insns)); - -	/* Stub uses address relative to kernel toc (from the paca) */ -	reladdr = addr - kernel_toc_addr(); -	if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { -		pr_err("%s: Address of %ps out of range of kernel_toc.\n", -							me->name, (void *)addr); +	if ((unsigned long)entry->jump % 8 != 0) { +		pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name);  		return 0;  	} -	entry->jump[1] |= PPC_HA(reladdr); -	entry->jump[2] |= PPC_LO(reladdr); +	BUILD_BUG_ON(sizeof(stub_insns) > sizeof(entry->jump)); +	memcpy(entry->jump, stub_insns, sizeof(stub_insns)); + +	if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { +		/* Stub uses address relative to kernel base (from the paca) */ +		reladdr = addr - local_paca->kernelbase; +		if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) { +			pr_err("%s: Address of %ps out of range of 34-bit relative address.\n", +				me->name, (void *)addr); +			return 0; +		} + +		entry->jump[2] |= IMM_H18(reladdr); +		entry->jump[3] |= IMM_L(reladdr); +	} else { +		/* Stub uses address relative to kernel toc (from the paca) */ +		reladdr = addr - kernel_toc_addr(); +		if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { +			pr_err("%s: Address of %ps out of range of kernel_toc.\n", +				me->name, (void *)addr); +			return 0; +		} + +		entry->jump[1] |= PPC_HA(reladdr); +		entry->jump[2] |= PPC_LO(reladdr); +	}  	/* Even though we don't use funcdata in the stub, it's needed elsewhere. */  	entry->funcdata = func_desc(addr); @@ -415,7 +579,11 @@ static bool is_mprofile_ftrace_call(const char *name)   */  static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me)  { +#ifndef CONFIG_PPC_KERNEL_PCREL  	return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000; +#else +	return -1; +#endif  }  /* Patch stub to reference function and correct r2 value. */ @@ -432,28 +600,53 @@ static inline int create_stub(const Elf64_Shdr *sechdrs,  	if (is_mprofile_ftrace_call(name))  		return create_ftrace_stub(entry, addr, me); +	if ((unsigned long)entry->jump % 8 != 0) { +		pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name); +		return 0; +	} + +	BUILD_BUG_ON(sizeof(ppc64_stub_insns) > sizeof(entry->jump));  	for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) {  		if (patch_instruction(&entry->jump[i],  				      ppc_inst(ppc64_stub_insns[i])))  			return 0;  	} -	/* Stub uses address relative to r2. */ -	reladdr = (unsigned long)entry - my_r2(sechdrs, me); -	if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { -		pr_err("%s: Address %p of stub out of range of %p.\n", -		       me->name, (void *)reladdr, (void *)my_r2); -		return 0; -	} -	pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); +	if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { +		/* Stub uses address relative to itself! */ +		reladdr = 0 + offsetof(struct ppc64_stub_entry, funcdata); +		BUILD_BUG_ON(reladdr != 32); +		if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) { +			pr_err("%s: Address of %p out of range of 34-bit relative address.\n", +				me->name, (void *)reladdr); +			return 0; +		} +		pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); -	if (patch_instruction(&entry->jump[0], -			      ppc_inst(entry->jump[0] | PPC_HA(reladdr)))) -		return 0; +		/* May not even need this if we're relative to 0 */ +		if (patch_instruction(&entry->jump[0], +		    ppc_inst_prefix(entry->jump[0] | IMM_H18(reladdr), +				    entry->jump[1] | IMM_L(reladdr)))) +			return 0; -	if (patch_instruction(&entry->jump[1], -			  ppc_inst(entry->jump[1] | PPC_LO(reladdr)))) -		return 0; +	} else { +		/* Stub uses address relative to r2. */ +		reladdr = (unsigned long)entry - my_r2(sechdrs, me); +		if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { +			pr_err("%s: Address %p of stub out of range of %p.\n", +			       me->name, (void *)reladdr, (void *)my_r2); +			return 0; +		} +		pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); + +		if (patch_instruction(&entry->jump[0], +				      ppc_inst(entry->jump[0] | PPC_HA(reladdr)))) +			return 0; + +		if (patch_instruction(&entry->jump[1], +				      ppc_inst(entry->jump[1] | PPC_LO(reladdr)))) +			return 0; +	}  	// func_desc_t is 8 bytes if ABIv2, else 16 bytes  	desc = func_desc(addr); @@ -497,6 +690,37 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,  	return (unsigned long)&stubs[i];  } +#ifdef CONFIG_PPC_KERNEL_PCREL +/* Create GOT to load the location described in this ptr */ +static unsigned long got_for_addr(const Elf64_Shdr *sechdrs, +				  unsigned long addr, +				  struct module *me, +				  const char *name) +{ +	struct ppc64_got_entry *got; +	unsigned int i, num_got; + +	if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) +		return addr; + +	num_got = sechdrs[me->arch.got_section].sh_size / sizeof(*got); + +	/* Find this stub, or if that fails, the next avail. entry */ +	got = (void *)sechdrs[me->arch.got_section].sh_addr; +	for (i = 0; got[i].addr; i++) { +		if (WARN_ON(i >= num_got)) +			return 0; + +		if (got[i].addr == addr) +			return (unsigned long)&got[i]; +	} + +	got[i].addr = addr; + +	return (unsigned long)&got[i]; +} +#endif +  /* We expect a noop next: if it is, replace it with instruction to     restore r2. */  static int restore_r2(const char *name, u32 *instruction, struct module *me) @@ -504,6 +728,9 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me)  	u32 *prev_insn = instruction - 1;  	u32 insn_val = *instruction; +	if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) +		return 0; +  	if (is_mprofile_ftrace_call(name))  		return 0; @@ -549,6 +776,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  	pr_debug("Applying ADD relocate section %u to %u\n", relsec,  	       sechdrs[relsec].sh_info); +#ifndef CONFIG_PPC_KERNEL_PCREL  	/* First time we're called, we can fix up .TOC. */  	if (!me->arch.toc_fixed) {  		sym = find_dot_toc(sechdrs, strtab, symindex); @@ -558,7 +786,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  			sym->st_value = my_r2(sechdrs, me);  		me->arch.toc_fixed = true;  	} - +#endif  	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {  		/* This is where to make the change */  		location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -586,6 +814,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  			*(unsigned long *)location = value;  			break; +#ifndef CONFIG_PPC_KERNEL_PCREL  		case R_PPC64_TOC:  			*(unsigned long *)location = my_r2(sechdrs, me);  			break; @@ -645,8 +874,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  				= (*((uint16_t *) location) & ~0xffff)  				| (value & 0xffff);  			break; +#endif  		case R_PPC_REL24: +#ifdef CONFIG_PPC_KERNEL_PCREL +		/* PCREL still generates REL24 for mcount */ +		case R_PPC64_REL24_NOTOC: +#endif  			/* FIXME: Handle weak symbols here --RR */  			if (sym->st_shndx == SHN_UNDEF ||  			    sym->st_shndx == SHN_LIVEPATCH) { @@ -694,6 +928,47 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  			*(u32 *)location = value;  			break; +#ifdef CONFIG_PPC_KERNEL_PCREL +		case R_PPC64_PCREL34: { +			unsigned long absvalue = value; + +			/* Convert value to relative */ +			value -= (unsigned long)location; + +			if (value + 0x200000000 > 0x3ffffffff) { +				if (sym->st_shndx != me->arch.pcpu_section) { +					pr_err("%s: REL34 %li out of range!\n", +					       me->name, (long)value); +					return -ENOEXEC; +				} + +				/* +				 * per-cpu section is special cased because +				 * it is moved during loading, so has to be +				 * converted to use GOT. +				 */ +				value = got_for_addr(sechdrs, absvalue, me, +						     strtab + sym->st_name); +				if (!value) +					return -ENOENT; +				value -= (unsigned long)location; + +				/* Turn pla into pld */ +				if (patch_instruction((u32 *)location, +				    ppc_inst_prefix((*(u32 *)location & ~0x02000000), +						    (*((u32 *)location + 1) & ~0xf8000000) | 0xe4000000))) +					return -EFAULT; +			} + +			if (patch_instruction((u32 *)location, +			    ppc_inst_prefix((*(u32 *)location & ~0x3ffff) | IMM_H18(value), +					    (*((u32 *)location + 1) & ~0xffff) | IMM_L(value)))) +				return -EFAULT; + +			break; +		} + +#else  		case R_PPC64_TOCSAVE:  			/*  			 * Marker reloc indicates we don't have to save r2. @@ -701,8 +976,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  			 * it.  			 */  			break; +#endif  		case R_PPC64_ENTRY: +			if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) +				break; +  			/*  			 * Optimize ELFv2 large code model entry point if  			 * the TOC is within 2GB range of current location. @@ -745,6 +1024,20 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,  				| (value & 0xffff);  			break; +#ifdef CONFIG_PPC_KERNEL_PCREL +		case R_PPC64_GOT_PCREL34: +			value = got_for_addr(sechdrs, value, me, +					     strtab + sym->st_name); +			if (!value) +				return -ENOENT; +			value -= (unsigned long)location; +			((uint32_t *)location)[0] = (((uint32_t *)location)[0] & ~0x3ffff) | +						    ((value >> 16) & 0x3ffff); +			((uint32_t *)location)[1] = (((uint32_t *)location)[1] & ~0xffff) | +						    (value & 0xffff); +			break; +#endif +  		default:  			pr_err("%s: Unknown ADD relocation: %lu\n",  			       me->name, diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index be8db402e963..cda4e00b67c1 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -191,7 +191,9 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)  #endif  	new_paca->lock_token = 0x8000;  	new_paca->paca_index = cpu; +#ifndef CONFIG_PPC_KERNEL_PCREL  	new_paca->kernel_toc = kernel_toc_addr(); +#endif  	new_paca->kernelbase = (unsigned long) _stext;  	/* Only set MSR:IR/DR when MMU is initialized */  	new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index d67cf79bf5d0..e88d7c9feeec 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -880,6 +880,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)  static void pcibios_fixup_resources(struct pci_dev *dev)  {  	struct pci_controller *hose = pci_bus_to_host(dev->bus); +	struct resource *res;  	int i;  	if (!hose) { @@ -891,9 +892,9 @@ static void pcibios_fixup_resources(struct pci_dev *dev)  	if (dev->is_virtfn)  		return; -	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { -		struct resource *res = dev->resource + i; +	pci_dev_for_each_resource(dev, res, i) {  		struct pci_bus_region reg; +  		if (!res->flags)  			continue; @@ -1452,11 +1453,10 @@ void pcibios_claim_one_bus(struct pci_bus *bus)  	struct pci_bus *child_bus;  	list_for_each_entry(dev, &bus->devices, bus_list) { +		struct resource *r;  		int i; -		for (i = 0; i < PCI_NUM_RESOURCES; i++) { -			struct resource *r = &dev->resource[i]; - +		pci_dev_for_each_resource(dev, r, i) {  			if (r->parent || !r->start || !r->flags)  				continue; @@ -1705,19 +1705,20 @@ EXPORT_SYMBOL_GPL(pcibios_scan_phb);  static void fixup_hide_host_resource_fsl(struct pci_dev *dev)  { -	int i, class = dev->class >> 8; +	int class = dev->class >> 8;  	/* When configured as agent, programming interface = 1 */  	int prog_if = dev->class & 0xf; +	struct resource *r;  	if ((class == PCI_CLASS_PROCESSOR_POWERPC ||  	     class == PCI_CLASS_BRIDGE_OTHER) &&  		(dev->hdr_type == PCI_HEADER_TYPE_NORMAL) &&  		(prog_if == 0) &&  		(dev->bus->parent == NULL)) { -		for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { -			dev->resource[i].start = 0; -			dev->resource[i].end = 0; -			dev->resource[i].flags = 0; +		pci_dev_for_each_resource(dev, r) { +			r->start = 0; +			r->end = 0; +			r->flags = 0;  		}  	}  } diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index fd42059ae2a5..e27342ef128b 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -73,7 +73,7 @@ static int __init pcibios_init(void)  	return 0;  } -subsys_initcall(pcibios_init); +subsys_initcall_sync(pcibios_init);  int pcibios_unmap_io_space(struct pci_bus *bus)  { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4b29ac5ddac6..1fefafb2b29b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1630,7 +1630,7 @@ void arch_setup_new_exec(void)  }  #ifdef CONFIG_PPC64 -/** +/*   * Assign a TIDR (thread ID) for task @t and set it in the thread   * structure. For now, we only support setting TIDR for 'current' task.   * @@ -1738,68 +1738,83 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)   */  int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)  { -	unsigned long clone_flags = args->flags; -	unsigned long usp = args->stack; -	unsigned long tls = args->tls; -	struct pt_regs *childregs, *kregs; +	struct pt_regs *kregs; /* Switch frame regs */  	extern void ret_from_fork(void);  	extern void ret_from_fork_scv(void); -	extern void ret_from_kernel_thread(void); +	extern void ret_from_kernel_user_thread(void); +	extern void start_kernel_thread(void);  	void (*f)(void);  	unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; -	struct thread_info *ti = task_thread_info(p);  #ifdef CONFIG_HAVE_HW_BREAKPOINT  	int i;  #endif  	klp_init_thread_info(p); -	/* Create initial stack frame. */ -	sp -= STACK_USER_INT_FRAME_SIZE; -	*(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER; - -	/* Copy registers */ -	childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); -	if (unlikely(args->fn)) { +	if (unlikely(p->flags & PF_KTHREAD)) {  		/* kernel thread */ + +		/* Create initial minimum stack frame. */ +		sp -= STACK_FRAME_MIN_SIZE;  		((unsigned long *)sp)[0] = 0; -		memset(childregs, 0, sizeof(struct pt_regs)); -		childregs->gpr[1] = sp + STACK_USER_INT_FRAME_SIZE; -		/* function */ -		if (args->fn) -			childregs->gpr[14] = ppc_function_entry((void *)args->fn); -#ifdef CONFIG_PPC64 -		clear_tsk_thread_flag(p, TIF_32BIT); -		childregs->softe = IRQS_ENABLED; -#endif -		childregs->gpr[15] = (unsigned long)args->fn_arg; + +		f = start_kernel_thread;  		p->thread.regs = NULL;	/* no user register state */ -		ti->flags |= _TIF_RESTOREALL; -		f = ret_from_kernel_thread; +		clear_tsk_compat_task(p);  	} else {  		/* user thread */ -		struct pt_regs *regs = current_pt_regs(); -		*childregs = *regs; -		if (usp) -			childregs->gpr[1] = usp; -		((unsigned long *)sp)[0] = childregs->gpr[1]; -		p->thread.regs = childregs; -		/* 64s sets this in ret_from_fork */ -		if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) -			childregs->gpr[3] = 0;  /* Result from fork() */ -		if (clone_flags & CLONE_SETTLS) { -			if (!is_32bit_task()) -				childregs->gpr[13] = tls; +		struct pt_regs *childregs; + +		/* Create initial user return stack frame. */ +		sp -= STACK_USER_INT_FRAME_SIZE; +		*(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER; + +		childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); + +		if (unlikely(args->fn)) { +			/* +			 * A user space thread, but it first runs a kernel +			 * thread, and then returns as though it had called +			 * execve rather than fork, so user regs will be +			 * filled in (e.g., by kernel_execve()). +			 */ +			((unsigned long *)sp)[0] = 0; +			memset(childregs, 0, sizeof(struct pt_regs)); +#ifdef CONFIG_PPC64 +			childregs->softe = IRQS_ENABLED; +#endif +			f = ret_from_kernel_user_thread; +		} else { +			struct pt_regs *regs = current_pt_regs(); +			unsigned long clone_flags = args->flags; +			unsigned long usp = args->stack; + +			/* Copy registers */ +			*childregs = *regs; +			if (usp) +				childregs->gpr[1] = usp; +			((unsigned long *)sp)[0] = childregs->gpr[1]; +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG +			WARN_ON_ONCE(childregs->softe != IRQS_ENABLED); +#endif +			if (clone_flags & CLONE_SETTLS) { +				unsigned long tls = args->tls; + +				if (!is_32bit_task()) +					childregs->gpr[13] = tls; +				else +					childregs->gpr[2] = tls; +			} + +			if (trap_is_scv(regs)) +				f = ret_from_fork_scv;  			else -				childregs->gpr[2] = tls; +				f = ret_from_fork;  		} -		if (trap_is_scv(regs)) -			f = ret_from_fork_scv; -		else -			f = ret_from_fork; +		childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); +		p->thread.regs = childregs;  	} -	childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);  	/*  	 * The way this works is that at some point in the future @@ -1813,6 +1828,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)  	sp -= STACK_SWITCH_FRAME_SIZE;  	((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE;  	kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS); +	kregs->nip = ppc_function_entry(f); +	if (unlikely(args->fn)) { +		/* +		 * Put kthread fn, arg parameters in non-volatile GPRs in the +		 * switch frame so they are loaded by _switch before it returns +		 * to ret_from_kernel_thread. +		 */ +		kregs->gpr[14] = ppc_function_entry((void *)args->fn); +		kregs->gpr[15] = (unsigned long)args->fn_arg; +	}  	p->thread.ksp = sp;  #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -1840,22 +1865,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)  		p->thread.dscr_inherit = current->thread.dscr_inherit;  		p->thread.dscr = mfspr(SPRN_DSCR);  	} -	if (cpu_has_feature(CPU_FTR_HAS_PPR)) -		childregs->ppr = DEFAULT_PPR;  	p->thread.tidr = 0;  #endif -	/* -	 * Run with the current AMR value of the kernel -	 */ -#ifdef CONFIG_PPC_PKEY -	if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) -		kregs->amr = AMR_KUAP_BLOCKED; - -	if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) -		kregs->iamr = AMR_KUEP_BLOCKED; -#endif -	kregs->nip = ppc_function_entry(f);  	return 0;  } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 2087a785f05f..5fff0d04b23f 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -290,6 +290,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,  static int ppr_get(struct task_struct *target, const struct user_regset *regset,  		   struct membuf to)  { +	if (!target->thread.regs) +		return -EINVAL; +  	return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64));  } @@ -297,6 +300,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset,  		   unsigned int pos, unsigned int count, const void *kbuf,  		   const void __user *ubuf)  { +	if (!target->thread.regs) +		return -EINVAL; +  	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,  				  &target->thread.regs->ppr, 0, sizeof(u64));  } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 31175b34856a..c087eeee320f 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -16,6 +16,7 @@  #include <linux/init.h>  #include <linux/kconfig.h>  #include <linux/kernel.h> +#include <linux/lockdep.h>  #include <linux/memblock.h>  #include <linux/of.h>  #include <linux/of_fdt.h> @@ -68,7 +69,7 @@ struct rtas_filter {   *                            functions are believed to have no users on   *                            ppc64le, and we want to keep it that way. It does   *                            not make sense for this to be set when @filter - *                            is false. + *                            is NULL.   */  struct rtas_function {  	s32 token; @@ -453,6 +454,16 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {  	},  }; +/* + * Nearly all RTAS calls need to be serialized. All uses of the + * default rtas_args block must hold rtas_lock. + * + * Exceptions to the RTAS serialization requirement (e.g. stop-self) + * must use a separate rtas_args structure. + */ +static DEFINE_RAW_SPINLOCK(rtas_lock); +static struct rtas_args rtas_args; +  /**   * rtas_function_token() - RTAS function token lookup.   * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN. @@ -560,6 +571,9 @@ static void __do_enter_rtas(struct rtas_args *args)  static void __do_enter_rtas_trace(struct rtas_args *args)  {  	const char *name = NULL; + +	if (args == &rtas_args) +		lockdep_assert_held(&rtas_lock);  	/*  	 * If the tracepoints that consume the function name aren't  	 * active, avoid the lookup. @@ -619,16 +633,6 @@ static void do_enter_rtas(struct rtas_args *args)  struct rtas_t rtas; -/* - * Nearly all RTAS calls need to be serialized. All uses of the - * default rtas_args block must hold rtas_lock. - * - * Exceptions to the RTAS serialization requirement (e.g. stop-self) - * must use a separate rtas_args structure. - */ -static DEFINE_RAW_SPINLOCK(rtas_lock); -static struct rtas_args rtas_args; -  DEFINE_SPINLOCK(rtas_data_buf_lock);  EXPORT_SYMBOL_GPL(rtas_data_buf_lock); @@ -951,6 +955,8 @@ static char *__fetch_rtas_last_error(char *altbuf)  	u32 bufsz;  	char *buf = NULL; +	lockdep_assert_held(&rtas_lock); +  	if (token == -1)  		return NULL; @@ -981,7 +987,7 @@ static char *__fetch_rtas_last_error(char *altbuf)  				buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC);  		}  		if (buf) -			memcpy(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX); +			memmove(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);  	}  	return buf; @@ -1016,6 +1022,23 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,  	do_enter_rtas(args);  } +/** + * rtas_call_unlocked() - Invoke an RTAS firmware function without synchronization. + * @args: RTAS parameter block to be used for the call, must obey RTAS addressing + *        constraints. + * @token: Identifies the function being invoked. + * @nargs: Number of input parameters. Does not include token. + * @nret: Number of output parameters, including the call status. + * @....: List of @nargs input parameters. + * + * Invokes the RTAS function indicated by @token, which the caller + * should obtain via rtas_function_token(). + * + * This function is similar to rtas_call(), but must be used with a + * limited set of RTAS calls specifically exempted from the general + * requirement that only one RTAS call may be in progress at any + * time. Examples include stop-self and ibm,nmi-interlock. + */  void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...)  {  	va_list list; @@ -1091,6 +1114,7 @@ static bool token_is_restricted_errinjct(s32 token)   */  int rtas_call(int token, int nargs, int nret, int *outputs, ...)  { +	struct pin_cookie cookie;  	va_list list;  	int i;  	unsigned long flags; @@ -1117,6 +1141,8 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)  	}  	raw_spin_lock_irqsave(&rtas_lock, flags); +	cookie = lockdep_pin_lock(&rtas_lock); +  	/* We use the global rtas args buffer */  	args = &rtas_args; @@ -1134,6 +1160,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)  			outputs[i] = be32_to_cpu(args->rets[i + 1]);  	ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0; +	lockdep_unpin_lock(&rtas_lock, cookie);  	raw_spin_unlock_irqrestore(&rtas_lock, flags);  	if (buff_copy) { @@ -1765,6 +1792,7 @@ err:  /* We assume to be passed big endian arguments */  SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)  { +	struct pin_cookie cookie;  	struct rtas_args args;  	unsigned long flags;  	char *buff_copy, *errbuf = NULL; @@ -1833,6 +1861,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)  	buff_copy = get_errorlog_buffer();  	raw_spin_lock_irqsave(&rtas_lock, flags); +	cookie = lockdep_pin_lock(&rtas_lock);  	rtas_args = args;  	do_enter_rtas(&rtas_args); @@ -1843,6 +1872,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)  	if (be32_to_cpu(args.rets[0]) == -1)  		errbuf = __fetch_rtas_last_error(buff_copy); +	lockdep_unpin_lock(&rtas_lock, cookie);  	raw_spin_unlock_irqrestore(&rtas_lock, flags);  	if (buff_copy) { diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index e77734e5a127..d2a446216444 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -630,13 +630,14 @@ static __init void probe_machine(void)  	for (machine_id = &__machine_desc_start;  	     machine_id < &__machine_desc_end;  	     machine_id++) { -		DBG("  %s ...", machine_id->name); +		DBG("  %s ...\n", machine_id->name); +		if (machine_id->compatible && !of_machine_is_compatible(machine_id->compatible)) +			continue;  		memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls)); -		if (ppc_md.probe()) { -			DBG(" match !\n"); -			break; -		} -		DBG("\n"); +		if (ppc_md.probe && !ppc_md.probe()) +			continue; +		DBG("   %s match !\n", machine_id->name); +		break;  	}  	/* What can we do if we didn't find ? */  	if (machine_id >= &__machine_desc_end) { diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index b2e0d3ce4261..246201d0d879 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -480,7 +480,7 @@ void early_setup_secondary(void)  #endif /* CONFIG_SMP */ -void panic_smp_self_stop(void) +void __noreturn panic_smp_self_stop(void)  {  	hard_irq_disable();  	spin_begin(); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 6b90f10a6c81..265801a3e94c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -61,6 +61,8 @@  #include <asm/kup.h>  #include <asm/fadump.h> +#include <trace/events/ipi.h> +  #ifdef DEBUG  #include <asm/udbg.h>  #define DBG(fmt...) udbg_printf(fmt) @@ -364,12 +366,12 @@ static inline void do_message_pass(int cpu, int msg)  #endif  } -void smp_send_reschedule(int cpu) +void arch_smp_send_reschedule(int cpu)  {  	if (likely(smp_ops))  		do_message_pass(cpu, PPC_MSG_RESCHEDULE);  } -EXPORT_SYMBOL_GPL(smp_send_reschedule); +EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);  void arch_send_call_function_single_ipi(int cpu)  { @@ -1611,7 +1613,7 @@ void start_secondary(void *unused)  	if (IS_ENABLED(CONFIG_PPC32))  		setup_kup(); -	mmgrab(&init_mm); +	mmgrab_lazy_tlb(&init_mm);  	current->active_mm = &init_mm;  	smp_store_cpu_info(cpu); @@ -1752,7 +1754,7 @@ void __cpu_die(unsigned int cpu)  		smp_ops->cpu_die(cpu);  } -void arch_cpu_idle_dead(void) +void __noreturn arch_cpu_idle_dead(void)  {  	/*  	 * Disable on the down path. This will be re-enabled by diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index ef9a61718940..0f39a6b84132 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -217,13 +217,18 @@ static DEVICE_ATTR(dscr_default, 0600,  static void __init sysfs_create_dscr_default(void)  {  	if (cpu_has_feature(CPU_FTR_DSCR)) { +		struct device *dev_root;  		int cpu;  		dscr_default = spr_default_dscr;  		for_each_possible_cpu(cpu)  			paca_ptrs[cpu]->dscr_default = dscr_default; -		device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); +		dev_root = bus_get_dev_root(&cpu_subsys); +		if (dev_root) { +			device_create_file(dev_root, &dev_attr_dscr_default); +			put_device(dev_root); +		}  	}  }  #endif /* CONFIG_PPC64 */ @@ -746,7 +751,12 @@ static DEVICE_ATTR(svm, 0444, show_svm, NULL);  static void __init create_svm_file(void)  { -	device_create_file(cpu_subsys.dev_root, &dev_attr_svm); +	struct device *dev_root = bus_get_dev_root(&cpu_subsys); + +	if (dev_root) { +		device_create_file(dev_root, &dev_attr_svm); +		put_device(dev_root); +	}  }  #else  static void __init create_svm_file(void) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 9d8665910350..df20cf201f74 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -887,7 +887,11 @@ void __init time_init(void)  	unsigned shift;  	/* Normal PowerPC with timebase register */ -	ppc_md.calibrate_decr(); +	if (ppc_md.calibrate_decr) +		ppc_md.calibrate_decr(); +	else +		generic_calibrate_decr(); +  	printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",  	       ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);  	printk(KERN_DEBUG "time_init: processor frequency   = %lu.%.6lu MHz\n", diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 7b85c3b460a3..a47f30373423 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -194,6 +194,8 @@ __ftrace_make_nop(struct module *mod,  	 * get corrupted.  	 *  	 * Use a b +8 to jump over the load. +	 * XXX: could make PCREL depend on MPROFILE_KERNEL +	 * XXX: check PCREL && MPROFILE_KERNEL calling sequence  	 */  	if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32))  		pop = ppc_inst(PPC_RAW_NOP()); @@ -725,6 +727,15 @@ int __init ftrace_dyn_arch_init(void)  {  	int i;  	unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init }; +#ifdef CONFIG_PPC_KERNEL_PCREL +	u32 stub_insns[] = { +		/* pla r12,addr */ +		PPC_PREFIX_MLS | __PPC_PRFX_R(1), +		PPC_INST_PADDI | ___PPC_RT(_R12), +		PPC_RAW_MTCTR(_R12), +		PPC_RAW_BCTR() +	}; +#else  	u32 stub_insns[] = {  		PPC_RAW_LD(_R12, _R13, PACATOC),  		PPC_RAW_ADDIS(_R12, _R12, 0), @@ -732,6 +743,8 @@ int __init ftrace_dyn_arch_init(void)  		PPC_RAW_MTCTR(_R12),  		PPC_RAW_BCTR()  	}; +#endif +  	unsigned long addr;  	long reladdr; @@ -740,19 +753,36 @@ int __init ftrace_dyn_arch_init(void)  	else  		addr = ppc_global_function_entry((void *)ftrace_caller); -	reladdr = addr - kernel_toc_addr(); +	if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { +		for (i = 0; i < 2; i++) { +			reladdr = addr - (unsigned long)tramp[i]; -	if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) { -		pr_err("Address of %ps out of range of kernel_toc.\n", +			if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) { +				pr_err("Address of %ps out of range of pcrel address.\n", +					(void *)addr); +				return -1; +			} + +			memcpy(tramp[i], stub_insns, sizeof(stub_insns)); +			tramp[i][0] |= IMM_H18(reladdr); +			tramp[i][1] |= IMM_L(reladdr); +			add_ftrace_tramp((unsigned long)tramp[i]); +		} +	} else { +		reladdr = addr - kernel_toc_addr(); + +		if (reladdr >= (long)SZ_2G || reladdr < -(long)SZ_2G) { +			pr_err("Address of %ps out of range of kernel_toc.\n",  				(void *)addr); -		return -1; -	} +			return -1; +		} -	for (i = 0; i < 2; i++) { -		memcpy(tramp[i], stub_insns, sizeof(stub_insns)); -		tramp[i][1] |= PPC_HA(reladdr); -		tramp[i][2] |= PPC_LO(reladdr); -		add_ftrace_tramp((unsigned long)tramp[i]); +		for (i = 0; i < 2; i++) { +			memcpy(tramp[i], stub_insns, sizeof(stub_insns)); +			tramp[i][1] |= PPC_HA(reladdr); +			tramp[i][2] |= PPC_LO(reladdr); +			add_ftrace_tramp((unsigned long)tramp[i]); +		}  	}  	return 0; diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 66f723f53be2..4c3f34485f08 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -2,7 +2,7 @@  # List of files in the vdso, has to be asm only for now -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 +# Include the generic Makefile to check the built vdso.  include $(srctree)/lib/vdso/Makefile  obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 0c4ecc8fec5a..48fc6658053a 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -38,7 +38,11 @@  	.else  	addi		r4, r5, VDSO_DATA_OFFSET  	.endif -	bl		DOTSYM(\funct) +#ifdef __powerpc64__ +	bl		CFUNC(DOTSYM(\funct)) +#else +	bl		\funct +#endif  	PPC_LL		r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)  #ifdef __powerpc64__  	PPC_LL		r2, PPC_MIN_STKFRM + STK_GOT(r1) diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index ffe5d90abe17..fcc0ad6d9c7b 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -177,10 +177,16 @@ fpone:  fphalf:  	.quad	0x3fe0000000000000	/* 0.5 */ +#ifdef CONFIG_PPC_KERNEL_PCREL +#define LDCONST(fr, name)		\ +	pla	r11,name@pcrel;		\ +	lfd	fr,0(r11) +#else  #define LDCONST(fr, name)		\  	addis	r11,r2,name@toc@ha;	\  	lfd	fr,name@toc@l(r11)  #endif +#endif  	.text  /*   * Internal routine to enable floating point and set FPSCR to 0. diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index ee86753e444e..13614f0b269c 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -169,12 +169,18 @@ SECTIONS  	}  #else /* CONFIG_PPC32 */ +#ifndef CONFIG_PPC_KERNEL_PCREL  	.toc1 : AT(ADDR(.toc1) - LOAD_OFFSET) {  		*(.toc1)  	} +#endif  	.got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) { +#ifdef CONFIG_PPC_KERNEL_PCREL +		*(.got) +#else  		*(.got .toc) +#endif  	}  	SOFT_MASK_TABLE(8)  |