diff options
Diffstat (limited to 'arch/powerpc')
47 files changed, 984 insertions, 157 deletions
| diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c51e6ce42e7a..2ed525a44734 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -166,6 +166,7 @@ config PPC  	select GENERIC_CLOCKEVENTS_BROADCAST	if SMP  	select GENERIC_CMOS_UPDATE  	select GENERIC_CPU_AUTOPROBE +	select GENERIC_CPU_VULNERABILITIES	if PPC_BOOK3S_64  	select GENERIC_IRQ_SHOW  	select GENERIC_IRQ_SHOW_LEVEL  	select GENERIC_SMP_IDLE_THREAD diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 9a677cd5997f..44697817ccc6 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1005,7 +1005,6 @@ static inline int pmd_protnone(pmd_t pmd)  }  #endif /* CONFIG_NUMA_BALANCING */ -#define __HAVE_ARCH_PMD_WRITE  #define pmd_write(pmd)		pte_write(pmd_pte(pmd))  #define __pmd_write(pmd)	__pte_write(pmd_pte(pmd))  #define pmd_savedwrite(pmd)	pte_savedwrite(pmd_pte(pmd)) diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index a703452d67b6..555e22d5e07f 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -209,5 +209,11 @@ exc_##label##_book3e:  	ori	r3,r3,vector_offset@l;		\  	mtspr	SPRN_IVOR##vector_number,r3; +#define RFI_TO_KERNEL							\ +	rfi + +#define RFI_TO_USER							\ +	rfi +  #endif /* _ASM_POWERPC_EXCEPTION_64E_H */ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index b27205297e1d..7197b179c1b1 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -74,6 +74,59 @@   */  #define EX_R3		EX_DAR +/* + * Macros for annotating the expected destination of (h)rfid + * + * The nop instructions allow us to insert one or more instructions to flush the + * L1-D cache when returning to userspace or a guest. + */ +#define RFI_FLUSH_SLOT							\ +	RFI_FLUSH_FIXUP_SECTION;					\ +	nop;								\ +	nop;								\ +	nop + +#define RFI_TO_KERNEL							\ +	rfid + +#define RFI_TO_USER							\ +	RFI_FLUSH_SLOT;							\ +	rfid;								\ +	b	rfi_flush_fallback + +#define RFI_TO_USER_OR_KERNEL						\ +	RFI_FLUSH_SLOT;							\ +	rfid;								\ +	b	rfi_flush_fallback + +#define RFI_TO_GUEST							\ +	RFI_FLUSH_SLOT;							\ +	rfid;								\ +	b	rfi_flush_fallback + +#define HRFI_TO_KERNEL							\ +	hrfid + +#define HRFI_TO_USER							\ +	RFI_FLUSH_SLOT;							\ +	hrfid;								\ +	b	hrfi_flush_fallback + +#define HRFI_TO_USER_OR_KERNEL						\ +	RFI_FLUSH_SLOT;							\ +	hrfid;								\ +	b	hrfi_flush_fallback + +#define HRFI_TO_GUEST							\ +	RFI_FLUSH_SLOT;							\ +	hrfid;								\ +	b	hrfi_flush_fallback + +#define HRFI_TO_UNKNOWN							\ +	RFI_FLUSH_SLOT;							\ +	hrfid;								\ +	b	hrfi_flush_fallback +  #ifdef CONFIG_RELOCATABLE  #define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)			\  	mfspr	r11,SPRN_##h##SRR0;	/* save SRR0 */			\ @@ -218,7 +271,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)  	mtspr	SPRN_##h##SRR0,r12;					\  	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\  	mtspr	SPRN_##h##SRR1,r10;					\ -	h##rfid;							\ +	h##RFI_TO_KERNEL;						\  	b	.	/* prevent speculative execution */  #define EXCEPTION_PROLOG_PSERIES_1(label, h)				\  	__EXCEPTION_PROLOG_PSERIES_1(label, h) @@ -232,7 +285,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)  	mtspr	SPRN_##h##SRR0,r12;					\  	mfspr	r12,SPRN_##h##SRR1;	/* and SRR1 */			\  	mtspr	SPRN_##h##SRR1,r10;					\ -	h##rfid;							\ +	h##RFI_TO_KERNEL;						\  	b	.	/* prevent speculative execution */  #define EXCEPTION_PROLOG_PSERIES_1_NORI(label, h)			\ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 8f88f771cc55..1e82eb3caabd 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -187,7 +187,20 @@ label##3:					       	\  	FTR_ENTRY_OFFSET label##1b-label##3b;		\  	.popsection; +#define RFI_FLUSH_FIXUP_SECTION				\ +951:							\ +	.pushsection __rfi_flush_fixup,"a";		\ +	.align 2;					\ +952:							\ +	FTR_ENTRY_OFFSET 951b-952b;			\ +	.popsection; + +  #ifndef __ASSEMBLY__ +#include <linux/types.h> + +extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; +  void apply_feature_fixups(void);  void setup_feature_keys(void);  #endif diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index a409177be8bd..eca3f9c68907 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -241,6 +241,7 @@  #define H_GET_HCA_INFO          0x1B8  #define H_GET_PERF_COUNT        0x1BC  #define H_MANAGE_TRACE          0x1C0 +#define H_GET_CPU_CHARACTERISTICS 0x1C8  #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4  #define H_QUERY_INT_STATE       0x1E4  #define H_POLL_PENDING		0x1D8 @@ -330,6 +331,17 @@  #define H_SIGNAL_SYS_RESET_ALL_OTHERS		-2  /* >= 0 values are CPU number */ +/* H_GET_CPU_CHARACTERISTICS return values */ +#define H_CPU_CHAR_SPEC_BAR_ORI31	(1ull << 63) // IBM bit 0 +#define H_CPU_CHAR_BCCTRL_SERIALISED	(1ull << 62) // IBM bit 1 +#define H_CPU_CHAR_L1D_FLUSH_ORI30	(1ull << 61) // IBM bit 2 +#define H_CPU_CHAR_L1D_FLUSH_TRIG2	(1ull << 60) // IBM bit 3 +#define H_CPU_CHAR_L1D_THREAD_PRIV	(1ull << 59) // IBM bit 4 + +#define H_CPU_BEHAV_FAVOUR_SECURITY	(1ull << 63) // IBM bit 0 +#define H_CPU_BEHAV_L1D_FLUSH_PR	(1ull << 62) // IBM bit 1 +#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ull << 61) // IBM bit 2 +  /* Flag values used in H_REGISTER_PROC_TBL hcall */  #define PROC_TABLE_OP_MASK	0x18  #define PROC_TABLE_DEREG	0x10 @@ -341,6 +353,7 @@  #define PROC_TABLE_GTSE		0x01  #ifndef __ASSEMBLY__ +#include <linux/types.h>  /**   * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments @@ -436,6 +449,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)  	}  } +struct h_cpu_char_result { +	u64 character; +	u64 behaviour; +}; +  #endif /* __ASSEMBLY__ */  #endif /* __KERNEL__ */  #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 96753f3aac6d..941c2a3f231b 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -180,6 +180,7 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,  		struct iommu_group *grp);  extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);  extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm); +extern void kvmppc_setup_partition_table(struct kvm *kvm);  extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,  				struct kvm_create_spapr_tce_64 *args); diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 73b92017b6d7..cd2fc1cc1cc7 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -76,6 +76,7 @@ struct machdep_calls {  	void __noreturn	(*restart)(char *cmd);  	void __noreturn (*halt)(void); +	void		(*panic)(char *str);  	void		(*cpu_die)(void);  	long		(*time_init)(void); /* Optional, may be NULL */ diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 6177d43f0ce8..e2a2b8400490 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -160,9 +160,10 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,  #endif  } -static inline void arch_dup_mmap(struct mm_struct *oldmm, -				 struct mm_struct *mm) +static inline int arch_dup_mmap(struct mm_struct *oldmm, +				struct mm_struct *mm)  { +	return 0;  }  #ifndef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 3892db93b837..23ac7fc0af23 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -232,6 +232,16 @@ struct paca_struct {  	struct sibling_subcore_state *sibling_subcore_state;  #endif  #endif +#ifdef CONFIG_PPC_BOOK3S_64 +	/* +	 * rfi fallback flush must be in its own cacheline to prevent +	 * other paca data leaking into the L1d +	 */ +	u64 exrfi[EX_SIZE] __aligned(0x80); +	void *rfi_flush_fallback_area; +	u64 l1d_flush_congruence; +	u64 l1d_flush_sets; +#endif  };  extern void copy_mm_to_paca(struct mm_struct *mm); diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 7f01b22fa6cb..55eddf50d149 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu)  	return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);  } +static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) +{ +	unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; +	long rc; + +	rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf); +	if (rc == H_SUCCESS) { +		p->character = retbuf[0]; +		p->behaviour = retbuf[1]; +	} + +	return rc; +} +  #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 257d23dbf55d..469b7fdc9be4 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -24,6 +24,7 @@ extern void reloc_got2(unsigned long);  void check_for_initrd(void);  void initmem_init(void); +void setup_panic(void);  #define ARCH_PANIC_TIMEOUT 180  #ifdef CONFIG_PPC_PSERIES @@ -38,6 +39,19 @@ static inline void pseries_big_endian_exceptions(void) {}  static inline void pseries_little_endian_exceptions(void) {}  #endif /* CONFIG_PPC_PSERIES */ +void rfi_flush_enable(bool enable); + +/* These are bit flags */ +enum l1d_flush_type { +	L1D_FLUSH_NONE		= 0x1, +	L1D_FLUSH_FALLBACK	= 0x2, +	L1D_FLUSH_ORI		= 0x4, +	L1D_FLUSH_MTTRIG	= 0x8, +}; + +void __init setup_rfi_flush(enum l1d_flush_type, bool enable); +void do_rfi_flush_fixups(enum l1d_flush_type types); +  #endif /* !__ASSEMBLY__ */  #endif	/* _ASM_POWERPC_SETUP_H */ diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index 0d960ef78a9a..1a6ed5919ffd 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -1,6 +1,7 @@  # UAPI Header export list  include include/uapi/asm-generic/Kbuild.asm +generic-y += bpf_perf_event.h  generic-y += param.h  generic-y += poll.h  generic-y += resource.h diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 61d6049f4c1e..637b7263cb86 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -443,6 +443,31 @@ struct kvm_ppc_rmmu_info {  	__u32	ap_encodings[8];  }; +/* For KVM_PPC_GET_CPU_CHAR */ +struct kvm_ppc_cpu_char { +	__u64	character;		/* characteristics of the CPU */ +	__u64	behaviour;		/* recommended software behaviour */ +	__u64	character_mask;		/* valid bits in character */ +	__u64	behaviour_mask;		/* valid bits in behaviour */ +}; + +/* + * Values for character and character_mask. + * These are identical to the values used by H_GET_CPU_CHARACTERISTICS. + */ +#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31		(1ULL << 63) +#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED	(1ULL << 62) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30	(1ULL << 61) +#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2	(1ULL << 60) +#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV	(1ULL << 59) +#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED	(1ULL << 58) +#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF	(1ULL << 57) +#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS	(1ULL << 56) + +#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY	(1ULL << 63) +#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR		(1ULL << 62) +#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR	(1ULL << 61) +  /* Per-vcpu XICS interrupt controller state */  #define KVM_REG_PPC_ICP_STATE	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6b958414b4e0..f390d57cf2e1 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -237,6 +237,11 @@ int main(void)  	OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);  	OFFSET(PACA_IN_MCE, paca_struct, in_mce);  	OFFSET(PACA_IN_NMI, paca_struct, in_nmi); +	OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area); +	OFFSET(PACA_EXRFI, paca_struct, exrfi); +	OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence); +	OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets); +  #endif  	OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);  	OFFSET(PACAKEXECSTATE, paca_struct, kexec_state); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 610955fe8b81..679bbe714e85 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -102,6 +102,7 @@ _GLOBAL(__setup_cpu_power9)  	li	r0,0  	mtspr	SPRN_PSSCR,r0  	mtspr	SPRN_LPID,r0 +	mtspr	SPRN_PID,r0  	mfspr	r3,SPRN_LPCR  	LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE  | LPCR_HEIC)  	or	r3, r3, r4 @@ -126,6 +127,7 @@ _GLOBAL(__restore_cpu_power9)  	li	r0,0  	mtspr	SPRN_PSSCR,r0  	mtspr	SPRN_LPID,r0 +	mtspr	SPRN_PID,r0  	mfspr   r3,SPRN_LPCR  	LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)  	or	r3, r3, r4 diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3320bcac7192..2748584b767d 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -37,6 +37,11 @@  #include <asm/tm.h>  #include <asm/ppc-opcode.h>  #include <asm/export.h> +#ifdef CONFIG_PPC_BOOK3S +#include <asm/exception-64s.h> +#else +#include <asm/exception-64e.h> +#endif  /*   * System calls. @@ -262,13 +267,23 @@ BEGIN_FTR_SECTION  END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	ld	r13,GPR13(r1)	/* only restore r13 if returning to usermode */ +	ld	r2,GPR2(r1) +	ld	r1,GPR1(r1) +	mtlr	r4 +	mtcr	r5 +	mtspr	SPRN_SRR0,r7 +	mtspr	SPRN_SRR1,r8 +	RFI_TO_USER +	b	.	/* prevent speculative execution */ + +	/* exit to kernel */  1:	ld	r2,GPR2(r1)  	ld	r1,GPR1(r1)  	mtlr	r4  	mtcr	r5  	mtspr	SPRN_SRR0,r7  	mtspr	SPRN_SRR1,r8 -	RFI +	RFI_TO_KERNEL  	b	.	/* prevent speculative execution */  .Lsyscall_error: @@ -397,8 +412,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	mtmsrd	r10, 1  	mtspr	SPRN_SRR0, r11  	mtspr	SPRN_SRR1, r12 - -	rfid +	RFI_TO_USER  	b	.	/* prevent speculative execution */  #endif  _ASM_NOKPROBE_SYMBOL(system_call_common); @@ -878,7 +892,7 @@ BEGIN_FTR_SECTION  END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	ACCOUNT_CPU_USER_EXIT(r13, r2, r4)  	REST_GPR(13, r1) -1: +  	mtspr	SPRN_SRR1,r3  	ld	r2,_CCR(r1) @@ -891,8 +905,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)  	ld	r3,GPR3(r1)  	ld	r4,GPR4(r1)  	ld	r1,GPR1(r1) +	RFI_TO_USER +	b	.	/* prevent speculative execution */ -	rfid +1:	mtspr	SPRN_SRR1,r3 + +	ld	r2,_CCR(r1) +	mtcrf	0xFF,r2 +	ld	r2,_NIP(r1) +	mtspr	SPRN_SRR0,r2 + +	ld	r0,GPR0(r1) +	ld	r2,GPR2(r1) +	ld	r3,GPR3(r1) +	ld	r4,GPR4(r1) +	ld	r1,GPR1(r1) +	RFI_TO_KERNEL  	b	.	/* prevent speculative execution */  #endif /* CONFIG_PPC_BOOK3E */ @@ -1073,7 +1101,7 @@ __enter_rtas:  	mtspr	SPRN_SRR0,r5  	mtspr	SPRN_SRR1,r6 -	rfid +	RFI_TO_KERNEL  	b	.	/* prevent speculative execution */  rtas_return_loc: @@ -1098,7 +1126,7 @@ rtas_return_loc:  	mtspr	SPRN_SRR0,r3  	mtspr	SPRN_SRR1,r4 -	rfid +	RFI_TO_KERNEL  	b	.	/* prevent speculative execution */  _ASM_NOKPROBE_SYMBOL(__enter_rtas)  _ASM_NOKPROBE_SYMBOL(rtas_return_loc) @@ -1171,7 +1199,7 @@ _GLOBAL(enter_prom)  	LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)  	andc	r11,r11,r12  	mtsrr1	r11 -	rfid +	RFI_TO_KERNEL  #endif /* CONFIG_PPC_BOOK3E */  1:	/* Return from OF */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e441b469dc8f..2dc10bf646b8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -256,7 +256,7 @@ BEGIN_FTR_SECTION  	LOAD_HANDLER(r12, machine_check_handle_early)  1:	mtspr	SPRN_SRR0,r12  	mtspr	SPRN_SRR1,r11 -	rfid +	RFI_TO_KERNEL  	b	.	/* prevent speculative execution */  2:  	/* Stack overflow. Stay on emergency stack and panic. @@ -445,7 +445,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)  	li	r3,MSR_ME  	andc	r10,r10,r3		/* Turn off MSR_ME */  	mtspr	SPRN_SRR1,r10 -	rfid +	RFI_TO_KERNEL  	b	.  2:  	/* @@ -463,7 +463,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)  	 */  	bl	machine_check_queue_event  	MACHINE_CHECK_HANDLER_WINDUP -	rfid +	RFI_TO_USER_OR_KERNEL  9:  	/* Deliver the machine check to host kernel in V mode. */  	MACHINE_CHECK_HANDLER_WINDUP @@ -598,6 +598,9 @@ EXC_COMMON_BEGIN(slb_miss_common)  	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */  	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */ +	andi.	r9,r11,MSR_PR	// Check for exception from userspace +	cmpdi	cr4,r9,MSR_PR	// And save the result in CR4 for later +  	/*  	 * Test MSR_RI before calling slb_allocate_realmode, because the  	 * MSR in r11 gets clobbered. However we still want to allocate @@ -624,9 +627,12 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)  	/* All done -- return from exception. */ +	bne	cr4,1f		/* returning to kernel */ +  .machine	push  .machine	"power4"  	mtcrf	0x80,r9 +	mtcrf	0x08,r9		/* MSR[PR] indication is in cr4 */  	mtcrf	0x04,r9		/* MSR[RI] indication is in cr5 */  	mtcrf	0x02,r9		/* I/D indication is in cr6 */  	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */ @@ -640,9 +646,30 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)  	ld	r11,PACA_EXSLB+EX_R11(r13)  	ld	r12,PACA_EXSLB+EX_R12(r13)  	ld	r13,PACA_EXSLB+EX_R13(r13) -	rfid +	RFI_TO_USER +	b	.	/* prevent speculative execution */ +1: +.machine	push +.machine	"power4" +	mtcrf	0x80,r9 +	mtcrf	0x08,r9		/* MSR[PR] indication is in cr4 */ +	mtcrf	0x04,r9		/* MSR[RI] indication is in cr5 */ +	mtcrf	0x02,r9		/* I/D indication is in cr6 */ +	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */ +.machine	pop + +	RESTORE_CTR(r9, PACA_EXSLB) +	RESTORE_PPR_PACA(PACA_EXSLB, r9) +	mr	r3,r12 +	ld	r9,PACA_EXSLB+EX_R9(r13) +	ld	r10,PACA_EXSLB+EX_R10(r13) +	ld	r11,PACA_EXSLB+EX_R11(r13) +	ld	r12,PACA_EXSLB+EX_R12(r13) +	ld	r13,PACA_EXSLB+EX_R13(r13) +	RFI_TO_KERNEL  	b	.	/* prevent speculative execution */ +  2:	std     r3,PACA_EXSLB+EX_DAR(r13)  	mr	r3,r12  	mfspr	r11,SPRN_SRR0 @@ -651,7 +678,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)  	mtspr	SPRN_SRR0,r10  	ld	r10,PACAKMSR(r13)  	mtspr	SPRN_SRR1,r10 -	rfid +	RFI_TO_KERNEL  	b	.  8:	std     r3,PACA_EXSLB+EX_DAR(r13) @@ -662,7 +689,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)  	mtspr	SPRN_SRR0,r10  	ld	r10,PACAKMSR(r13)  	mtspr	SPRN_SRR1,r10 -	rfid +	RFI_TO_KERNEL  	b	.  EXC_COMMON_BEGIN(unrecov_slb) @@ -901,7 +928,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)  	mtspr	SPRN_SRR0,r10 ; 				\  	ld	r10,PACAKMSR(r13) ;				\  	mtspr	SPRN_SRR1,r10 ; 				\ -	rfid ; 							\ +	RFI_TO_KERNEL ;						\  	b	. ;	/* prevent speculative execution */  #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH @@ -917,7 +944,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)				\  	xori	r12,r12,MSR_LE ;				\  	mtspr	SPRN_SRR1,r12 ;					\  	mr	r13,r9 ;					\ -	rfid ;		/* return to userspace */		\ +	RFI_TO_USER ;	/* return to userspace */		\  	b	. ;	/* prevent speculative execution */  #else  #define SYSCALL_FASTENDIAN_TEST @@ -1063,7 +1090,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)  	mtcr	r11  	REST_GPR(11, r1)  	ld	r1,GPR1(r1) -	hrfid +	HRFI_TO_USER_OR_KERNEL  1:	mtcr	r11  	REST_GPR(11, r1) @@ -1314,7 +1341,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)  	ld	r11,PACA_EXGEN+EX_R11(r13)  	ld	r12,PACA_EXGEN+EX_R12(r13)  	ld	r13,PACA_EXGEN+EX_R13(r13) -	HRFID +	HRFI_TO_UNKNOWN  	b	.  #endif @@ -1418,10 +1445,94 @@ masked_##_H##interrupt:					\  	ld	r10,PACA_EXGEN+EX_R10(r13);		\  	ld	r11,PACA_EXGEN+EX_R11(r13);		\  	/* returns to kernel where r13 must be set up, so don't restore it */ \ -	##_H##rfid;					\ +	##_H##RFI_TO_KERNEL;				\  	b	.;					\  	MASKED_DEC_HANDLER(_H) +TRAMP_REAL_BEGIN(rfi_flush_fallback) +	SET_SCRATCH0(r13); +	GET_PACA(r13); +	std	r9,PACA_EXRFI+EX_R9(r13) +	std	r10,PACA_EXRFI+EX_R10(r13) +	std	r11,PACA_EXRFI+EX_R11(r13) +	std	r12,PACA_EXRFI+EX_R12(r13) +	std	r8,PACA_EXRFI+EX_R13(r13) +	mfctr	r9 +	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) +	ld	r11,PACA_L1D_FLUSH_SETS(r13) +	ld	r12,PACA_L1D_FLUSH_CONGRUENCE(r13) +	/* +	 * The load adresses are at staggered offsets within cachelines, +	 * which suits some pipelines better (on others it should not +	 * hurt). +	 */ +	addi	r12,r12,8 +	mtctr	r11 +	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + +	/* order ld/st prior to dcbt stop all streams with flushing */ +	sync +1:	li	r8,0 +	.rept	8 /* 8-way set associative */ +	ldx	r11,r10,r8 +	add	r8,r8,r12 +	xor	r11,r11,r11	// Ensure r11 is 0 even if fallback area is not +	add	r8,r8,r11	// Add 0, this creates a dependency on the ldx +	.endr +	addi	r10,r10,128 /* 128 byte cache line */ +	bdnz	1b + +	mtctr	r9 +	ld	r9,PACA_EXRFI+EX_R9(r13) +	ld	r10,PACA_EXRFI+EX_R10(r13) +	ld	r11,PACA_EXRFI+EX_R11(r13) +	ld	r12,PACA_EXRFI+EX_R12(r13) +	ld	r8,PACA_EXRFI+EX_R13(r13) +	GET_SCRATCH0(r13); +	rfid + +TRAMP_REAL_BEGIN(hrfi_flush_fallback) +	SET_SCRATCH0(r13); +	GET_PACA(r13); +	std	r9,PACA_EXRFI+EX_R9(r13) +	std	r10,PACA_EXRFI+EX_R10(r13) +	std	r11,PACA_EXRFI+EX_R11(r13) +	std	r12,PACA_EXRFI+EX_R12(r13) +	std	r8,PACA_EXRFI+EX_R13(r13) +	mfctr	r9 +	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) +	ld	r11,PACA_L1D_FLUSH_SETS(r13) +	ld	r12,PACA_L1D_FLUSH_CONGRUENCE(r13) +	/* +	 * The load adresses are at staggered offsets within cachelines, +	 * which suits some pipelines better (on others it should not +	 * hurt). +	 */ +	addi	r12,r12,8 +	mtctr	r11 +	DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ + +	/* order ld/st prior to dcbt stop all streams with flushing */ +	sync +1:	li	r8,0 +	.rept	8 /* 8-way set associative */ +	ldx	r11,r10,r8 +	add	r8,r8,r12 +	xor	r11,r11,r11	// Ensure r11 is 0 even if fallback area is not +	add	r8,r8,r11	// Add 0, this creates a dependency on the ldx +	.endr +	addi	r10,r10,128 /* 128 byte cache line */ +	bdnz	1b + +	mtctr	r9 +	ld	r9,PACA_EXRFI+EX_R9(r13) +	ld	r10,PACA_EXRFI+EX_R10(r13) +	ld	r11,PACA_EXRFI+EX_R11(r13) +	ld	r12,PACA_EXRFI+EX_R12(r13) +	ld	r8,PACA_EXRFI+EX_R13(r13) +	GET_SCRATCH0(r13); +	hrfid +  /*   * Real mode exceptions actually use this too, but alternate   * instruction code patches (which end up in the common .text area) @@ -1441,7 +1552,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_interrupt)  	addi	r13, r13, 4  	mtspr	SPRN_SRR0, r13  	GET_SCRATCH0(r13) -	rfid +	RFI_TO_KERNEL  	b	.  TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt) @@ -1453,7 +1564,7 @@ TRAMP_REAL_BEGIN(kvmppc_skip_Hinterrupt)  	addi	r13, r13, 4  	mtspr	SPRN_HSRR0, r13  	GET_SCRATCH0(r13) -	hrfid +	HRFI_TO_KERNEL  	b	.  #endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 04ea5c04fd24..3c2c2688918f 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1462,25 +1462,6 @@ static void fadump_init_files(void)  	return;  } -static int fadump_panic_event(struct notifier_block *this, -			      unsigned long event, void *ptr) -{ -	/* -	 * If firmware-assisted dump has been registered then trigger -	 * firmware-assisted dump and let firmware handle everything -	 * else. If this returns, then fadump was not registered, so -	 * go through the rest of the panic path. -	 */ -	crash_fadump(NULL, ptr); - -	return NOTIFY_DONE; -} - -static struct notifier_block fadump_panic_block = { -	.notifier_call = fadump_panic_event, -	.priority = INT_MIN /* may not return; must be done last */ -}; -  /*   * Prepare for firmware-assisted dump.   */ @@ -1513,9 +1494,6 @@ int __init setup_fadump(void)  		init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);  	fadump_init_files(); -	atomic_notifier_chain_register(&panic_notifier_list, -					&fadump_panic_block); -  	return 1;  }  subsys_initcall(setup_fadump); diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 8ac0bd2bddb0..3280953a82cf 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -623,7 +623,9 @@ BEGIN_FTR_SECTION  	 * NOTE, we rely on r0 being 0 from above.  	 */  	mtspr	SPRN_IAMR,r0 +BEGIN_FTR_SECTION_NESTED(42)  	mtspr	SPRN_AMOR,r0 +END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42)  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)  	/* save regs for local vars on new stack. diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bfdd783e3916..72be0c32e902 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1403,7 +1403,7 @@ void show_regs(struct pt_regs * regs)  	printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",  	       regs->nip, regs->link, regs->ctr); -	printk("REGS: %p TRAP: %04lx   %s  (%s)\n", +	printk("REGS: %px TRAP: %04lx   %s  (%s)\n",  	       regs, regs->trap, print_tainted(), init_utsname()->release);  	printk("MSR:  "REG" ", regs->msr);  	print_msr_bits(regs->msr); @@ -1569,16 +1569,22 @@ void arch_release_task_struct(struct task_struct *t)   */  int set_thread_tidr(struct task_struct *t)  { +	int rc; +  	if (!cpu_has_feature(CPU_FTR_ARCH_300))  		return -EINVAL;  	if (t != current)  		return -EINVAL; -	t->thread.tidr = assign_thread_tidr(); -	if (t->thread.tidr < 0) -		return t->thread.tidr; +	if (t->thread.tidr) +		return 0; + +	rc = assign_thread_tidr(); +	if (rc < 0) +		return rc; +	t->thread.tidr = rc;  	mtspr(SPRN_TIDR, t->thread.tidr);  	return 0; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 2075322cd225..8fd3a70047f1 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -242,14 +242,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)  	unsigned short maj;  	unsigned short min; -	/* We only show online cpus: disable preempt (overzealous, I -	 * knew) to prevent cpu going down. */ -	preempt_disable(); -	if (!cpu_online(cpu_id)) { -		preempt_enable(); -		return 0; -	} -  #ifdef CONFIG_SMP  	pvr = per_cpu(cpu_pvr, cpu_id);  #else @@ -358,9 +350,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)  #ifdef CONFIG_SMP  	seq_printf(m, "\n");  #endif - -	preempt_enable(); -  	/* If this is the last cpu, print the summary */  	if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)  		show_cpuinfo_summary(m); @@ -704,6 +693,30 @@ int check_legacy_ioport(unsigned long base_port)  }  EXPORT_SYMBOL(check_legacy_ioport); +static int ppc_panic_event(struct notifier_block *this, +                             unsigned long event, void *ptr) +{ +	/* +	 * If firmware-assisted dump has been registered then trigger +	 * firmware-assisted dump and let firmware handle everything else. +	 */ +	crash_fadump(NULL, ptr); +	ppc_md.panic(ptr);  /* May not return */ +	return NOTIFY_DONE; +} + +static struct notifier_block ppc_panic_block = { +	.notifier_call = ppc_panic_event, +	.priority = INT_MIN /* may not return; must be done last */ +}; + +void __init setup_panic(void) +{ +	if (!ppc_md.panic) +		return; +	atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block); +} +  #ifdef CONFIG_CHECK_CACHE_COHERENCY  /*   * For platforms that have configurable cache-coherency.  This function @@ -848,6 +861,9 @@ void __init setup_arch(char **cmdline_p)  	/* Probe the machine type, establish ppc_md. */  	probe_machine(); +	/* Setup panic notifier if requested by the platform. */ +	setup_panic(); +  	/*  	 * Configure ppc_md.power_save (ppc32 only, 64-bit machines do  	 * it from their respective probe() function. diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 8956a9856604..e67413f4a8f0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -38,6 +38,7 @@  #include <linux/memory.h>  #include <linux/nmi.h> +#include <asm/debugfs.h>  #include <asm/io.h>  #include <asm/kdump.h>  #include <asm/prom.h> @@ -801,3 +802,141 @@ static int __init disable_hardlockup_detector(void)  	return 0;  }  early_initcall(disable_hardlockup_detector); + +#ifdef CONFIG_PPC_BOOK3S_64 +static enum l1d_flush_type enabled_flush_types; +static void *l1d_flush_fallback_area; +static bool no_rfi_flush; +bool rfi_flush; + +static int __init handle_no_rfi_flush(char *p) +{ +	pr_info("rfi-flush: disabled on command line."); +	no_rfi_flush = true; +	return 0; +} +early_param("no_rfi_flush", handle_no_rfi_flush); + +/* + * The RFI flush is not KPTI, but because users will see doco that says to use + * nopti we hijack that option here to also disable the RFI flush. + */ +static int __init handle_no_pti(char *p) +{ +	pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); +	handle_no_rfi_flush(NULL); +	return 0; +} +early_param("nopti", handle_no_pti); + +static void do_nothing(void *unused) +{ +	/* +	 * We don't need to do the flush explicitly, just enter+exit kernel is +	 * sufficient, the RFI exit handlers will do the right thing. +	 */ +} + +void rfi_flush_enable(bool enable) +{ +	if (rfi_flush == enable) +		return; + +	if (enable) { +		do_rfi_flush_fixups(enabled_flush_types); +		on_each_cpu(do_nothing, NULL, 1); +	} else +		do_rfi_flush_fixups(L1D_FLUSH_NONE); + +	rfi_flush = enable; +} + +static void init_fallback_flush(void) +{ +	u64 l1d_size, limit; +	int cpu; + +	l1d_size = ppc64_caches.l1d.size; +	limit = min(safe_stack_limit(), ppc64_rma_size); + +	/* +	 * Align to L1d size, and size it at 2x L1d size, to catch possible +	 * hardware prefetch runoff. We don't have a recipe for load patterns to +	 * reliably avoid the prefetcher. +	 */ +	l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); +	memset(l1d_flush_fallback_area, 0, l1d_size * 2); + +	for_each_possible_cpu(cpu) { +		/* +		 * The fallback flush is currently coded for 8-way +		 * associativity. Different associativity is possible, but it +		 * will be treated as 8-way and may not evict the lines as +		 * effectively. +		 * +		 * 128 byte lines are mandatory. +		 */ +		u64 c = l1d_size / 8; + +		paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; +		paca[cpu].l1d_flush_congruence = c; +		paca[cpu].l1d_flush_sets = c / 128; +	} +} + +void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) +{ +	if (types & L1D_FLUSH_FALLBACK) { +		pr_info("rfi-flush: Using fallback displacement flush\n"); +		init_fallback_flush(); +	} + +	if (types & L1D_FLUSH_ORI) +		pr_info("rfi-flush: Using ori type flush\n"); + +	if (types & L1D_FLUSH_MTTRIG) +		pr_info("rfi-flush: Using mttrig type flush\n"); + +	enabled_flush_types = types; + +	if (!no_rfi_flush) +		rfi_flush_enable(enable); +} + +#ifdef CONFIG_DEBUG_FS +static int rfi_flush_set(void *data, u64 val) +{ +	if (val == 1) +		rfi_flush_enable(true); +	else if (val == 0) +		rfi_flush_enable(false); +	else +		return -EINVAL; + +	return 0; +} + +static int rfi_flush_get(void *data, u64 *val) +{ +	*val = rfi_flush ? 1 : 0; +	return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); + +static __init int rfi_flush_debugfs_init(void) +{ +	debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); +	return 0; +} +device_initcall(rfi_flush_debugfs_init); +#endif + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ +	if (rfi_flush) +		return sprintf(buf, "Mitigation: RFI Flush\n"); + +	return sprintf(buf, "Vulnerable\n"); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 0494e1566ee2..307843d23682 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -132,6 +132,15 @@ SECTIONS  	/* Read-only data */  	RO_DATA(PAGE_SIZE) +#ifdef CONFIG_PPC64 +	. = ALIGN(8); +	__rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { +		__start___rfi_flush_fixup = .; +		*(__rfi_flush_fixup) +		__stop___rfi_flush_fixup = .; +	} +#endif +  	EXCEPTION_TABLE(0)  	NOTES :kernel :notes diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c index 29ebe2fd5867..a93d719edc90 100644 --- a/arch/powerpc/kvm/book3s_64_mmu.c +++ b/arch/powerpc/kvm/book3s_64_mmu.c @@ -235,6 +235,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,  		gpte->may_read = true;  		gpte->may_write = true;  		gpte->page_size = MMU_PAGE_4K; +		gpte->wimg = HPTE_R_M;  		return 0;  	} diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 235319c2574e..b73dbc9e797d 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -65,11 +65,17 @@ struct kvm_resize_hpt {  	u32 order;  	/* These fields protected by kvm->lock */ + +	/* Possible values and their usage: +	 *  <0     an error occurred during allocation, +	 *  -EBUSY allocation is in the progress, +	 *  0      allocation made successfuly. +	 */  	int error; -	bool prepare_done; -	/* Private to the work thread, until prepare_done is true, -	 * then protected by kvm->resize_hpt_sem */ +	/* Private to the work thread, until error != -EBUSY, +	 * then protected by kvm->lock. +	 */  	struct kvm_hpt_info hpt;  }; @@ -159,8 +165,6 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)  		 * Reset all the reverse-mapping chains for all memslots  		 */  		kvmppc_rmap_reset(kvm); -		/* Ensure that each vcpu will flush its TLB on next entry. */ -		cpumask_setall(&kvm->arch.need_tlb_flush);  		err = 0;  		goto out;  	} @@ -176,6 +180,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)  	kvmppc_set_hpt(kvm, &info);  out: +	if (err == 0) +		/* Ensure that each vcpu will flush its TLB on next entry. */ +		cpumask_setall(&kvm->arch.need_tlb_flush); +  	mutex_unlock(&kvm->lock);  	return err;  } @@ -1238,8 +1246,9 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,  	unsigned long vpte, rpte, guest_rpte;  	int ret;  	struct revmap_entry *rev; -	unsigned long apsize, psize, avpn, pteg, hash; +	unsigned long apsize, avpn, pteg, hash;  	unsigned long new_idx, new_pteg, replace_vpte; +	int pshift;  	hptep = (__be64 *)(old->virt + (idx << 4)); @@ -1298,8 +1307,8 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,  		goto out;  	rpte = be64_to_cpu(hptep[1]); -	psize = hpte_base_page_size(vpte, rpte); -	avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23); +	pshift = kvmppc_hpte_base_page_shift(vpte, rpte); +	avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);  	pteg = idx / HPTES_PER_GROUP;  	if (vpte & HPTE_V_SECONDARY)  		pteg = ~pteg; @@ -1311,20 +1320,20 @@ static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,  		offset = (avpn & 0x1f) << 23;  		vsid = avpn >> 5;  		/* We can find more bits from the pteg value */ -		if (psize < (1ULL << 23)) -			offset |= ((vsid ^ pteg) & old_hash_mask) * psize; +		if (pshift < 23) +			offset |= ((vsid ^ pteg) & old_hash_mask) << pshift; -		hash = vsid ^ (offset / psize); +		hash = vsid ^ (offset >> pshift);  	} else {  		unsigned long offset, vsid;  		/* We only have 40 - 23 bits of seg_off in avpn */  		offset = (avpn & 0x1ffff) << 23;  		vsid = avpn >> 17; -		if (psize < (1ULL << 23)) -			offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) * psize; +		if (pshift < 23) +			offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift; -		hash = vsid ^ (vsid << 25) ^ (offset / psize); +		hash = vsid ^ (vsid << 25) ^ (offset >> pshift);  	}  	new_pteg = hash & new_hash_mask; @@ -1412,16 +1421,20 @@ static void resize_hpt_pivot(struct kvm_resize_hpt *resize)  static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)  { -	BUG_ON(kvm->arch.resize_hpt != resize); +	if (WARN_ON(!mutex_is_locked(&kvm->lock))) +		return;  	if (!resize)  		return; -	if (resize->hpt.virt) -		kvmppc_free_hpt(&resize->hpt); +	if (resize->error != -EBUSY) { +		if (resize->hpt.virt) +			kvmppc_free_hpt(&resize->hpt); +		kfree(resize); +	} -	kvm->arch.resize_hpt = NULL; -	kfree(resize); +	if (kvm->arch.resize_hpt == resize) +		kvm->arch.resize_hpt = NULL;  }  static void resize_hpt_prepare_work(struct work_struct *work) @@ -1430,17 +1443,41 @@ static void resize_hpt_prepare_work(struct work_struct *work)  						     struct kvm_resize_hpt,  						     work);  	struct kvm *kvm = resize->kvm; -	int err; +	int err = 0; -	resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", -			 resize->order); - -	err = resize_hpt_allocate(resize); +	if (WARN_ON(resize->error != -EBUSY)) +		return;  	mutex_lock(&kvm->lock); +	/* Request is still current? */ +	if (kvm->arch.resize_hpt == resize) { +		/* We may request large allocations here: +		 * do not sleep with kvm->lock held for a while. +		 */ +		mutex_unlock(&kvm->lock); + +		resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n", +				 resize->order); + +		err = resize_hpt_allocate(resize); + +		/* We have strict assumption about -EBUSY +		 * when preparing for HPT resize. +		 */ +		if (WARN_ON(err == -EBUSY)) +			err = -EINPROGRESS; + +		mutex_lock(&kvm->lock); +		/* It is possible that kvm->arch.resize_hpt != resize +		 * after we grab kvm->lock again. +		 */ +	} +  	resize->error = err; -	resize->prepare_done = true; + +	if (kvm->arch.resize_hpt != resize) +		resize_hpt_release(kvm, resize);  	mutex_unlock(&kvm->lock);  } @@ -1465,14 +1502,12 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,  	if (resize) {  		if (resize->order == shift) { -			/* Suitable resize in progress */ -			if (resize->prepare_done) { -				ret = resize->error; -				if (ret != 0) -					resize_hpt_release(kvm, resize); -			} else { +			/* Suitable resize in progress? */ +			ret = resize->error; +			if (ret == -EBUSY)  				ret = 100; /* estimated time in ms */ -			} +			else if (ret) +				resize_hpt_release(kvm, resize);  			goto out;  		} @@ -1492,6 +1527,8 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,  		ret = -ENOMEM;  		goto out;  	} + +	resize->error = -EBUSY;  	resize->order = shift;  	resize->kvm = kvm;  	INIT_WORK(&resize->work, resize_hpt_prepare_work); @@ -1546,16 +1583,12 @@ long kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,  	if (!resize || (resize->order != shift))  		goto out; -	ret = -EBUSY; -	if (!resize->prepare_done) -		goto out; -  	ret = resize->error; -	if (ret != 0) +	if (ret)  		goto out;  	ret = resize_hpt_rehash(resize); -	if (ret != 0) +	if (ret)  		goto out;  	resize_hpt_pivot(resize); @@ -1801,6 +1834,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,  	ssize_t nb;  	long int err, ret;  	int mmu_ready; +	int pshift;  	if (!access_ok(VERIFY_READ, buf, count))  		return -EFAULT; @@ -1855,6 +1889,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,  			err = -EINVAL;  			if (!(v & HPTE_V_VALID))  				goto out; +			pshift = kvmppc_hpte_base_page_shift(v, r); +			if (pshift <= 0) +				goto out;  			lbuf += 2;  			nb += HPTE_SIZE; @@ -1869,14 +1906,18 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,  				goto out;  			}  			if (!mmu_ready && is_vrma_hpte(v)) { -				unsigned long psize = hpte_base_page_size(v, r); -				unsigned long senc = slb_pgsize_encoding(psize); -				unsigned long lpcr; +				unsigned long senc, lpcr; +				senc = slb_pgsize_encoding(1ul << pshift);  				kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |  					(VRMA_VSID << SLB_VSID_SHIFT_1T); -				lpcr = senc << (LPCR_VRMASD_SH - 4); -				kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD); +				if (!cpu_has_feature(CPU_FTR_ARCH_300)) { +					lpcr = senc << (LPCR_VRMASD_SH - 4); +					kvmppc_update_lpcr(kvm, lpcr, +							   LPCR_VRMASD); +				} else { +					kvmppc_setup_partition_table(kvm); +				}  				mmu_ready = 1;  			}  			++i; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 79ea3d9269db..2d46037ce936 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -120,7 +120,6 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");  static void kvmppc_end_cede(struct kvm_vcpu *vcpu);  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); -static void kvmppc_setup_partition_table(struct kvm *kvm);  static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,  		int *ip) @@ -3574,7 +3573,7 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)  	return;  } -static void kvmppc_setup_partition_table(struct kvm *kvm) +void kvmppc_setup_partition_table(struct kvm *kvm)  {  	unsigned long dw0, dw1; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 2659844784b8..9c61f736c75b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -79,7 +79,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)  	mtmsrd	r0,1		/* clear RI in MSR */  	mtsrr0	r5  	mtsrr1	r6 -	RFI +	RFI_TO_KERNEL  kvmppc_call_hv_entry:  BEGIN_FTR_SECTION @@ -199,7 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)  	mtmsrd	r6, 1			/* Clear RI in MSR */  	mtsrr0	r8  	mtsrr1	r7 -	RFI +	RFI_TO_KERNEL  	/* Virtual-mode return */  .Lvirt_return: @@ -1167,8 +1167,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)  	ld	r0, VCPU_GPR(R0)(r4)  	ld	r4, VCPU_GPR(R4)(r4) - -	hrfid +	HRFI_TO_GUEST  	b	.  secondary_too_late: @@ -3320,7 +3319,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)  	ld	r4, PACAKMSR(r13)  	mtspr	SPRN_SRR0, r3  	mtspr	SPRN_SRR1, r4 -	rfid +	RFI_TO_KERNEL  9:	addi	r3, r1, STACK_FRAME_OVERHEAD  	bl	kvmppc_bad_interrupt  	b	9b diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index d0dc8624198f..7deaeeb14b93 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -60,6 +60,7 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);  #define MSR_USER32 MSR_USER  #define MSR_USER64 MSR_USER  #define HW_PAGE_SIZE PAGE_SIZE +#define HPTE_R_M   _PAGE_COHERENT  #endif  static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu) @@ -557,6 +558,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,  		pte.eaddr = eaddr;  		pte.vpage = eaddr >> 12;  		pte.page_size = MMU_PAGE_64K; +		pte.wimg = HPTE_R_M;  	}  	switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) { diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 42a4b237df5f..34a5adeff084 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -46,6 +46,9 @@  #define FUNC(name)		name +#define RFI_TO_KERNEL	RFI +#define RFI_TO_GUEST	RFI +  .macro INTERRUPT_TRAMPOLINE intno  .global kvmppc_trampoline_\intno @@ -141,7 +144,7 @@ kvmppc_handler_skip_ins:  	GET_SCRATCH0(r13)  	/* And get back into the code */ -	RFI +	RFI_TO_KERNEL  #endif  /* @@ -164,6 +167,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)  	ori	r5, r5, MSR_EE  	mtsrr0	r7  	mtsrr1	r6 -	RFI +	RFI_TO_KERNEL  #include "book3s_segment.S" diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index 2a2b96d53999..93a180ceefad 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -156,7 +156,7 @@ no_dcbz32_on:  	PPC_LL	r9, SVCPU_R9(r3)  	PPC_LL	r3, (SVCPU_R3)(r3) -	RFI +	RFI_TO_GUEST  kvmppc_handler_trampoline_enter_end: @@ -407,5 +407,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)  	cmpwi	r12, BOOK3S_INTERRUPT_DOORBELL  	beqa	BOOK3S_INTERRUPT_DOORBELL -	RFI +	RFI_TO_KERNEL  kvmppc_handler_trampoline_exit_end: diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index bf457843e032..0d750d274c4e 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -725,7 +725,8 @@ u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)  	/* Return the per-cpu state for state saving/migration */  	return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT | -	       (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT; +	       (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT | +	       (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;  }  int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) @@ -1558,7 +1559,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)  	/*  	 * Restore P and Q. If the interrupt was pending, we -	 * force both P and Q, which will trigger a resend. +	 * force Q and !P, which will trigger a resend.  	 *  	 * That means that a guest that had both an interrupt  	 * pending (queued) and Q set will restore with only @@ -1566,7 +1567,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)  	 * is perfectly fine as coalescing interrupts that haven't  	 * been presented yet is always allowed.  	 */ -	if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) +	if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))  		state->old_p = true;  	if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)  		state->old_q = true; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6b6c53c42ac9..0a7c88786ec0 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -39,6 +39,10 @@  #include <asm/iommu.h>  #include <asm/switch_to.h>  #include <asm/xive.h> +#ifdef CONFIG_PPC_PSERIES +#include <asm/hvcall.h> +#include <asm/plpar_wrappers.h> +#endif  #include "timing.h"  #include "irq.h" @@ -548,6 +552,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)  #ifdef CONFIG_KVM_XICS  	case KVM_CAP_IRQ_XICS:  #endif +	case KVM_CAP_PPC_GET_CPU_CHAR:  		r = 1;  		break; @@ -1407,7 +1412,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)  {  	int r; -	sigset_t sigsaved;  	if (vcpu->mmio_needed) {  		vcpu->mmio_needed = 0; @@ -1448,16 +1452,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)  #endif  	} -	if (vcpu->sigset_active) -		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); +	kvm_sigset_activate(vcpu);  	if (run->immediate_exit)  		r = -EINTR;  	else  		r = kvmppc_vcpu_run(run, vcpu); -	if (vcpu->sigset_active) -		sigprocmask(SIG_SETMASK, &sigsaved, NULL); +	kvm_sigset_deactivate(vcpu);  	return r;  } @@ -1762,6 +1764,124 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,  	return r;  } +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * These functions check whether the underlying hardware is safe + * against attacks based on observing the effects of speculatively + * executed instructions, and whether it supplies instructions for + * use in workarounds.  The information comes from firmware, either + * via the device tree on powernv platforms or from an hcall on + * pseries platforms. + */ +#ifdef CONFIG_PPC_PSERIES +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ +	struct h_cpu_char_result c; +	unsigned long rc; + +	if (!machine_is(pseries)) +		return -ENOTTY; + +	rc = plpar_get_cpu_characteristics(&c); +	if (rc == H_SUCCESS) { +		cp->character = c.character; +		cp->behaviour = c.behaviour; +		cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | +			KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | +			KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | +			KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | +			KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | +			KVM_PPC_CPU_CHAR_BR_HINT_HONOURED | +			KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF | +			KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; +		cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | +			KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | +			KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; +	} +	return 0; +} +#else +static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ +	return -ENOTTY; +} +#endif + +static inline bool have_fw_feat(struct device_node *fw_features, +				const char *state, const char *name) +{ +	struct device_node *np; +	bool r = false; + +	np = of_get_child_by_name(fw_features, name); +	if (np) { +		r = of_property_read_bool(np, state); +		of_node_put(np); +	} +	return r; +} + +static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp) +{ +	struct device_node *np, *fw_features; +	int r; + +	memset(cp, 0, sizeof(*cp)); +	r = pseries_get_cpu_char(cp); +	if (r != -ENOTTY) +		return r; + +	np = of_find_node_by_name(NULL, "ibm,opal"); +	if (np) { +		fw_features = of_get_child_by_name(np, "fw-features"); +		of_node_put(np); +		if (!fw_features) +			return 0; +		if (have_fw_feat(fw_features, "enabled", +				 "inst-spec-barrier-ori31,31,0")) +			cp->character |= KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31; +		if (have_fw_feat(fw_features, "enabled", +				 "fw-bcctrl-serialized")) +			cp->character |= KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED; +		if (have_fw_feat(fw_features, "enabled", +				 "inst-l1d-flush-ori30,30,0")) +			cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30; +		if (have_fw_feat(fw_features, "enabled", +				 "inst-l1d-flush-trig2")) +			cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2; +		if (have_fw_feat(fw_features, "enabled", +				 "fw-l1d-thread-split")) +			cp->character |= KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV; +		if (have_fw_feat(fw_features, "enabled", +				 "fw-count-cache-disabled")) +			cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; +		cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 | +			KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED | +			KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 | +			KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 | +			KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV | +			KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS; + +		if (have_fw_feat(fw_features, "enabled", +				 "speculation-policy-favor-security")) +			cp->behaviour |= KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY; +		if (!have_fw_feat(fw_features, "disabled", +				  "needs-l1d-flush-msr-pr-0-to-1")) +			cp->behaviour |= KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR; +		if (!have_fw_feat(fw_features, "disabled", +				  "needs-spec-barrier-for-bound-checks")) +			cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; +		cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY | +			KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR | +			KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + +		of_node_put(fw_features); +	} + +	return 0; +} +#endif +  long kvm_arch_vm_ioctl(struct file *filp,                         unsigned int ioctl, unsigned long arg)  { @@ -1864,6 +1984,14 @@ long kvm_arch_vm_ioctl(struct file *filp,  			r = -EFAULT;  		break;  	} +	case KVM_PPC_GET_CPU_CHAR: { +		struct kvm_ppc_cpu_char cpuchar; + +		r = kvmppc_get_cpu_char(&cpuchar); +		if (r >= 0 && copy_to_user(argp, &cpuchar, sizeof(cpuchar))) +			r = -EFAULT; +		break; +	}  	default: {  		struct kvm *kvm = filp->private_data;  		r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg); diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 41cf5ae273cf..a95ea007d654 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -116,6 +116,47 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)  	}  } +#ifdef CONFIG_PPC_BOOK3S_64 +void do_rfi_flush_fixups(enum l1d_flush_type types) +{ +	unsigned int instrs[3], *dest; +	long *start, *end; +	int i; + +	start = PTRRELOC(&__start___rfi_flush_fixup), +	end = PTRRELOC(&__stop___rfi_flush_fixup); + +	instrs[0] = 0x60000000; /* nop */ +	instrs[1] = 0x60000000; /* nop */ +	instrs[2] = 0x60000000; /* nop */ + +	if (types & L1D_FLUSH_FALLBACK) +		/* b .+16 to fallback flush */ +		instrs[0] = 0x48000010; + +	i = 0; +	if (types & L1D_FLUSH_ORI) { +		instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ +		instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ +	} + +	if (types & L1D_FLUSH_MTTRIG) +		instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + +	for (i = 0; start < end; start++, i++) { +		dest = (void *)start + *start; + +		pr_devel("patching dest %lx\n", (unsigned long)dest); + +		patch_instruction(dest, instrs[0]); +		patch_instruction(dest + 1, instrs[1]); +		patch_instruction(dest + 2, instrs[2]); +	} + +	printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); +} +#endif /* CONFIG_PPC_BOOK3S_64 */ +  void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)  {  	long *start, *end; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 4797d08581ce..6e1e39035380 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -145,6 +145,11 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address)  	return __bad_area(regs, address, SEGV_MAPERR);  } +static noinline int bad_access(struct pt_regs *regs, unsigned long address) +{ +	return __bad_area(regs, address, SEGV_ACCERR); +} +  static int do_sigbus(struct pt_regs *regs, unsigned long address,  		     unsigned int fault)  { @@ -490,7 +495,7 @@ retry:  good_area:  	if (unlikely(access_error(is_write, is_exec, vma))) -		return bad_area(regs, address); +		return bad_access(regs, address);  	/*  	 * If for any reason at all we couldn't handle the fault, diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 3848af167df9..640cf566e986 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -47,7 +47,8 @@  DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) +static inline unsigned long  ___tlbie(unsigned long vpn, int psize, +						int apsize, int ssize)  {  	unsigned long va;  	unsigned int penc; @@ -100,7 +101,15 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)  			     : "memory");  		break;  	} -	trace_tlbie(0, 0, va, 0, 0, 0, 0); +	return va; +} + +static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) +{ +	unsigned long rb; + +	rb = ___tlbie(vpn, psize, apsize, ssize); +	trace_tlbie(0, 0, rb, 0, 0, 0, 0);  }  static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) @@ -652,7 +661,7 @@ static void native_hpte_clear(void)  		if (hpte_v & HPTE_V_VALID) {  			hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);  			hptep->v = 0; -			__tlbie(vpn, psize, apsize, ssize); +			___tlbie(vpn, psize, apsize, ssize);  		}  	} diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 46d74e81aff1..d183b4801bdb 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -763,7 +763,8 @@ emit_clear:  			func = (u8 *) __bpf_call_base + imm;  			/* Save skb pointer if we need to re-cache skb data */ -			if (bpf_helper_changes_pkt_data(func)) +			if ((ctx->seen & SEEN_SKB) && +			    bpf_helper_changes_pkt_data(func))  				PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));  			bpf_jit_emit_func_call(image, ctx, (u64)func); @@ -772,7 +773,8 @@ emit_clear:  			PPC_MR(b2p[BPF_REG_0], 3);  			/* refresh skb cache */ -			if (bpf_helper_changes_pkt_data(func)) { +			if ((ctx->seen & SEEN_SKB) && +			    bpf_helper_changes_pkt_data(func)) {  				/* reload skb pointer to r3 */  				PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));  				bpf_jit_emit_skb_loads(image, ctx); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 9e3da168d54c..fce545774d50 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -410,8 +410,12 @@ static __u64 power_pmu_bhrb_to(u64 addr)  	int ret;  	__u64 target; -	if (is_kernel_addr(addr)) -		return branch_target((unsigned int *)addr); +	if (is_kernel_addr(addr)) { +		if (probe_kernel_read(&instr, (void *)addr, sizeof(instr))) +			return 0; + +		return branch_target(&instr); +	}  	/* Userspace: need copy instruction here then translate it */  	pagefault_disable(); @@ -1415,7 +1419,7 @@ static int collect_events(struct perf_event *group, int max_count,  	int n = 0;  	struct perf_event *event; -	if (!is_software_event(group)) { +	if (group->pmu->task_ctx_nr == perf_hw_context) {  		if (n >= max_count)  			return -1;  		ctrs[n] = group; @@ -1423,7 +1427,7 @@ static int collect_events(struct perf_event *group, int max_count,  		events[n++] = group->hw.config;  	}  	list_for_each_entry(event, &group->sibling_list, group_entry) { -		if (!is_software_event(event) && +		if (event->pmu->task_ctx_nr == perf_hw_context &&  		    event->state != PERF_EVENT_STATE_OFF) {  			if (n >= max_count)  				return -1; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 0ead3cd73caa..be4e7f84f70a 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -310,6 +310,19 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu)  		return 0;  	/* +	 * Check whether nest_imc is registered. We could end up here if the +	 * cpuhotplug callback registration fails. i.e, callback invokes the +	 * offline path for all successfully registered nodes. At this stage, +	 * nest_imc pmu will not be registered and we should return here. +	 * +	 * We return with a zero since this is not an offline failure. And +	 * cpuhp_setup_state() returns the actual failure reason to the caller, +	 * which in turn will call the cleanup routine. +	 */ +	if (!nest_pmus) +		return 0; + +	/*  	 * Now that this cpu is one of the designated,  	 * find a next cpu a) which is online and b) in same chip.  	 */ @@ -1171,6 +1184,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)  		if (nest_pmus == 1) {  			cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);  			kfree(nest_imc_refc); +			kfree(per_nest_pmu_arr);  		}  		if (nest_pmus > 0) @@ -1195,7 +1209,6 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)  		kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);  	kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);  	kfree(pmu_ptr); -	kfree(per_nest_pmu_arr);  	return;  } @@ -1309,6 +1322,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id  			ret = nest_pmu_cpumask_init();  			if (ret) {  				mutex_unlock(&nest_init_lock); +				kfree(nest_imc_refc); +				kfree(per_nest_pmu_arr);  				goto err_free;  			}  		} diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 1edfbc1e40f4..4fb21e17504a 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -37,13 +37,62 @@  #include <asm/kexec.h>  #include <asm/smp.h>  #include <asm/tm.h> +#include <asm/setup.h>  #include "powernv.h" +static void pnv_setup_rfi_flush(void) +{ +	struct device_node *np, *fw_features; +	enum l1d_flush_type type; +	int enable; + +	/* Default to fallback in case fw-features are not available */ +	type = L1D_FLUSH_FALLBACK; +	enable = 1; + +	np = of_find_node_by_name(NULL, "ibm,opal"); +	fw_features = of_get_child_by_name(np, "fw-features"); +	of_node_put(np); + +	if (fw_features) { +		np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); +		if (np && of_property_read_bool(np, "enabled")) +			type = L1D_FLUSH_MTTRIG; + +		of_node_put(np); + +		np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); +		if (np && of_property_read_bool(np, "enabled")) +			type = L1D_FLUSH_ORI; + +		of_node_put(np); + +		/* Enable unless firmware says NOT to */ +		enable = 2; +		np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); +		if (np && of_property_read_bool(np, "disabled")) +			enable--; + +		of_node_put(np); + +		np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); +		if (np && of_property_read_bool(np, "disabled")) +			enable--; + +		of_node_put(np); +		of_node_put(fw_features); +	} + +	setup_rfi_flush(type, enable > 0); +} +  static void __init pnv_setup_arch(void)  {  	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); +	pnv_setup_rfi_flush(); +  	/* Initialize SMP */  	pnv_smp_init(); diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 9dabea6e1443..6244bc849469 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -104,6 +104,20 @@ static void __noreturn ps3_halt(void)  	ps3_sys_manager_halt(); /* never returns */  } +static void ps3_panic(char *str) +{ +	DBG("%s:%d %s\n", __func__, __LINE__, str); + +	smp_send_stop(); +	printk("\n"); +	printk("   System does not reboot automatically.\n"); +	printk("   Please press POWER button.\n"); +	printk("\n"); + +	while(1) +		lv1_pause(1); +} +  #if defined(CONFIG_FB_PS3) || defined(CONFIG_FB_PS3_MODULE) || \      defined(CONFIG_PS3_FLASH) || defined(CONFIG_PS3_FLASH_MODULE)  static void __init prealloc(struct ps3_prealloc *p) @@ -255,6 +269,7 @@ define_machine(ps3) {  	.probe				= ps3_probe,  	.setup_arch			= ps3_setup_arch,  	.init_IRQ			= ps3_init_IRQ, +	.panic				= ps3_panic,  	.get_boot_time			= ps3_get_boot_time,  	.set_dabr			= ps3_set_dabr,  	.calibrate_decr			= ps3_calibrate_decr, diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 6e35780c5962..a0b20c03f078 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -574,11 +574,26 @@ static ssize_t dlpar_show(struct class *class, struct class_attribute *attr,  static CLASS_ATTR_RW(dlpar); -static int __init pseries_dlpar_init(void) +int __init dlpar_workqueue_init(void)  { +	if (pseries_hp_wq) +		return 0; +  	pseries_hp_wq = alloc_workqueue("pseries hotplug workqueue", -					WQ_UNBOUND, 1); +			WQ_UNBOUND, 1); + +	return pseries_hp_wq ? 0 : -ENOMEM; +} + +static int __init dlpar_sysfs_init(void) +{ +	int rc; + +	rc = dlpar_workqueue_init(); +	if (rc) +		return rc; +  	return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);  } -machine_device_initcall(pseries, pseries_dlpar_init); +machine_device_initcall(pseries, dlpar_sysfs_init); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 4470a3194311..1ae1d9f4dbe9 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -98,4 +98,6 @@ static inline unsigned long cmo_get_page_size(void)  	return CMO_PageSize;  } +int dlpar_workqueue_init(void); +  #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 4923ffe230cf..81d8614e7379 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -69,7 +69,8 @@ static int __init init_ras_IRQ(void)  	/* Hotplug Events */  	np = of_find_node_by_path("/event-sources/hot-plug-events");  	if (np != NULL) { -		request_event_sources_irqs(np, ras_hotplug_interrupt, +		if (dlpar_workqueue_init() == 0) +			request_event_sources_irqs(np, ras_hotplug_interrupt,  					   "RAS_HOTPLUG");  		of_node_put(np);  	} diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 5f1beb8367ac..ae4f596273b5 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -459,6 +459,39 @@ static void __init find_and_init_phbs(void)  	of_pci_check_probe_only();  } +static void pseries_setup_rfi_flush(void) +{ +	struct h_cpu_char_result result; +	enum l1d_flush_type types; +	bool enable; +	long rc; + +	/* Enable by default */ +	enable = true; + +	rc = plpar_get_cpu_characteristics(&result); +	if (rc == H_SUCCESS) { +		types = L1D_FLUSH_NONE; + +		if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) +			types |= L1D_FLUSH_MTTRIG; +		if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) +			types |= L1D_FLUSH_ORI; + +		/* Use fallback if nothing set in hcall */ +		if (types == L1D_FLUSH_NONE) +			types = L1D_FLUSH_FALLBACK; + +		if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) +			enable = false; +	} else { +		/* Default to fallback if case hcall is not available */ +		types = L1D_FLUSH_FALLBACK; +	} + +	setup_rfi_flush(types, enable); +} +  static void __init pSeries_setup_arch(void)  {  	set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); @@ -476,6 +509,8 @@ static void __init pSeries_setup_arch(void)  	fwnmi_init(); +	pseries_setup_rfi_flush(); +  	/* By default, only probe PCI (can be overridden by rtas_pci) */  	pci_add_flags(PCI_PROBE_ONLY); @@ -726,6 +761,7 @@ define_machine(pseries) {  	.pcibios_fixup		= pSeries_final_fixup,  	.restart		= rtas_restart,  	.halt			= rtas_halt, +	.panic			= rtas_os_term,  	.get_boot_time		= rtas_get_boot_time,  	.get_rtc_time		= rtas_get_rtc_time,  	.set_rtc_time		= rtas_set_rtc_time, diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 44cbf4c12ea1..df95102e732c 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -354,6 +354,7 @@ static int fsl_of_msi_remove(struct platform_device *ofdev)  }  static struct lock_class_key fsl_msi_irq_class; +static struct lock_class_key fsl_msi_irq_request_class;  static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,  			       int offset, int irq_index) @@ -373,7 +374,8 @@ static int fsl_msi_setup_hwirq(struct fsl_msi *msi, struct platform_device *dev,  		dev_err(&dev->dev, "No memory for MSI cascade data\n");  		return -ENOMEM;  	} -	irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class); +	irq_set_lockdep_class(virt_msir, &fsl_msi_irq_class, +			      &fsl_msi_irq_request_class);  	cascade_data->index = offset;  	cascade_data->msi_data = msi;  	cascade_data->virq = virt_msir; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 1b2d8cb49abb..0ddc7ac6c5f1 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1590,7 +1590,7 @@ static void print_bug_trap(struct pt_regs *regs)  	printf("kernel BUG at %s:%u!\n",  	       bug->file, bug->line);  #else -	printf("kernel BUG at %p!\n", (void *)bug->bug_addr); +	printf("kernel BUG at %px!\n", (void *)bug->bug_addr);  #endif  #endif /* CONFIG_BUG */  } @@ -2329,7 +2329,7 @@ static void dump_one_paca(int cpu)  	p = &paca[cpu]; -	printf("paca for cpu 0x%x @ %p:\n", cpu, p); +	printf("paca for cpu 0x%x @ %px:\n", cpu, p);  	printf(" %-*s = %s\n", 20, "possible", cpu_possible(cpu) ? "yes" : "no");  	printf(" %-*s = %s\n", 20, "present", cpu_present(cpu) ? "yes" : "no"); @@ -2344,10 +2344,10 @@ static void dump_one_paca(int cpu)  	DUMP(p, kernel_toc, "lx");  	DUMP(p, kernelbase, "lx");  	DUMP(p, kernel_msr, "lx"); -	DUMP(p, emergency_sp, "p"); +	DUMP(p, emergency_sp, "px");  #ifdef CONFIG_PPC_BOOK3S_64 -	DUMP(p, nmi_emergency_sp, "p"); -	DUMP(p, mc_emergency_sp, "p"); +	DUMP(p, nmi_emergency_sp, "px"); +	DUMP(p, mc_emergency_sp, "px");  	DUMP(p, in_nmi, "x");  	DUMP(p, in_mce, "x");  	DUMP(p, hmi_event_available, "x"); @@ -2375,17 +2375,21 @@ static void dump_one_paca(int cpu)  	DUMP(p, slb_cache_ptr, "x");  	for (i = 0; i < SLB_CACHE_ENTRIES; i++)  		printf(" slb_cache[%d]:        = 0x%016lx\n", i, p->slb_cache[i]); + +	DUMP(p, rfi_flush_fallback_area, "px"); +	DUMP(p, l1d_flush_congruence, "llx"); +	DUMP(p, l1d_flush_sets, "llx");  #endif  	DUMP(p, dscr_default, "llx");  #ifdef CONFIG_PPC_BOOK3E -	DUMP(p, pgd, "p"); -	DUMP(p, kernel_pgd, "p"); -	DUMP(p, tcd_ptr, "p"); -	DUMP(p, mc_kstack, "p"); -	DUMP(p, crit_kstack, "p"); -	DUMP(p, dbg_kstack, "p"); +	DUMP(p, pgd, "px"); +	DUMP(p, kernel_pgd, "px"); +	DUMP(p, tcd_ptr, "px"); +	DUMP(p, mc_kstack, "px"); +	DUMP(p, crit_kstack, "px"); +	DUMP(p, dbg_kstack, "px");  #endif -	DUMP(p, __current, "p"); +	DUMP(p, __current, "px");  	DUMP(p, kstack, "lx");  	printf(" kstack_base          = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1));  	DUMP(p, stab_rr, "lx"); @@ -2403,7 +2407,7 @@ static void dump_one_paca(int cpu)  #endif  #ifdef CONFIG_PPC_POWERNV -	DUMP(p, core_idle_state_ptr, "p"); +	DUMP(p, core_idle_state_ptr, "px");  	DUMP(p, thread_idle_state, "x");  	DUMP(p, thread_mask, "x");  	DUMP(p, subcore_sibling_mask, "x"); @@ -2945,7 +2949,7 @@ static void show_task(struct task_struct *tsk)  		(tsk->exit_state & EXIT_DEAD) ? 'E' :  		(tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; -	printf("%p %016lx %6d %6d %c %2d %s\n", tsk, +	printf("%px %016lx %6d %6d %c %2d %s\n", tsk,  		tsk->thread.ksp,  		tsk->pid, tsk->parent->pid,  		state, task_thread_info(tsk)->cpu, @@ -2988,7 +2992,7 @@ static void show_pte(unsigned long addr)  	if (setjmp(bus_error_jmp) != 0) {  		catch_memory_errors = 0; -		printf("*** Error dumping pte for task %p\n", tsk); +		printf("*** Error dumping pte for task %px\n", tsk);  		return;  	} @@ -3074,7 +3078,7 @@ static void show_tasks(void)  	if (setjmp(bus_error_jmp) != 0) {  		catch_memory_errors = 0; -		printf("*** Error dumping task %p\n", tsk); +		printf("*** Error dumping task %px\n", tsk);  		return;  	} |