diff options
Diffstat (limited to 'arch/x86/kernel')
| -rw-r--r-- | arch/x86/kernel/apm_32.c | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/intel.c | 22 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/intel_cacheinfo.c | 12 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 10 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 3 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 12 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 78 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 6 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 11 | ||||
| -rw-r--r-- | arch/x86/kernel/entry_32.S | 9 | ||||
| -rw-r--r-- | arch/x86/kernel/espfix_64.c | 5 | ||||
| -rw-r--r-- | arch/x86/kernel/kprobes/core.c | 3 | ||||
| -rw-r--r-- | arch/x86/kernel/tsc.c | 4 | 
13 files changed, 138 insertions, 38 deletions
| diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index f3a1f04ed4cb..584874451414 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -841,7 +841,6 @@ static int apm_do_idle(void)  	u32 eax;  	u8 ret = 0;  	int idled = 0; -	int polling;  	int err = 0;  	if (!need_resched()) { diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a80029035bf2..f9e4fdd3b877 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -370,6 +370,17 @@ static void init_intel(struct cpuinfo_x86 *c)  	 */  	detect_extended_topology(c); +	if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { +		/* +		 * let's use the legacy cpuid vector 0x1 and 0x4 for topology +		 * detection. +		 */ +		c->x86_max_cores = intel_num_cpu_cores(c); +#ifdef CONFIG_X86_32 +		detect_ht(c); +#endif +	} +  	l2 = init_intel_cacheinfo(c);  	if (c->cpuid_level > 9) {  		unsigned eax = cpuid_eax(10); @@ -438,17 +449,6 @@ static void init_intel(struct cpuinfo_x86 *c)  		set_cpu_cap(c, X86_FEATURE_P3);  #endif -	if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { -		/* -		 * let's use the legacy cpuid vector 0x1 and 0x4 for topology -		 * detection. -		 */ -		c->x86_max_cores = intel_num_cpu_cores(c); -#ifdef CONFIG_X86_32 -		detect_ht(c); -#endif -	} -  	/* Work around errata */  	srat_detect_node(c); diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index a952e9c85b6f..9c8f7394c612 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -730,6 +730,18 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)  #endif  	} +#ifdef CONFIG_X86_HT +	/* +	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in +	 * turns means that the only possibility is SMT (as indicated in +	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know +	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to +	 * c->phys_proc_id. +	 */ +	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID) +		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; +#endif +  	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));  	return l2; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index bb92f38153b2..9a79c8dbd8e8 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2451,6 +2451,12 @@ static __init int mcheck_init_device(void)  	for_each_online_cpu(i) {  		err = mce_device_create(i);  		if (err) { +			/* +			 * Register notifier anyway (and do not unreg it) so +			 * that we don't leave undeleted timers, see notifier +			 * callback above. +			 */ +			__register_hotcpu_notifier(&mce_cpu_notifier);  			cpu_notifier_register_done();  			goto err_device_create;  		} @@ -2471,10 +2477,6 @@ static __init int mcheck_init_device(void)  err_register:  	unregister_syscore_ops(&mce_syscore_ops); -	cpu_notifier_register_begin(); -	__unregister_hotcpu_notifier(&mce_cpu_notifier); -	cpu_notifier_register_done(); -  err_device_create:  	/*  	 * We didn't keep track of which devices were created above, but diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2bdfbff8a4f6..2879ecdaac43 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -118,6 +118,9 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)  			continue;  		if (event->attr.config1 & ~er->valid_mask)  			return -EINVAL; +		/* Check if the extra msrs can be safely accessed*/ +		if (!er->extra_msr_access) +			return -ENXIO;  		reg->idx = er->idx;  		reg->config = event->attr.config1; diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 3b2f9bdd974b..8ade93111e03 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -295,14 +295,16 @@ struct extra_reg {  	u64			config_mask;  	u64			valid_mask;  	int			idx;  /* per_xxx->regs[] reg index */ +	bool			extra_msr_access;  };  #define EVENT_EXTRA_REG(e, ms, m, vm, i) {	\ -	.event = (e),		\ -	.msr = (ms),		\ -	.config_mask = (m),	\ -	.valid_mask = (vm),	\ -	.idx = EXTRA_REG_##i,	\ +	.event = (e),			\ +	.msr = (ms),			\ +	.config_mask = (m),		\ +	.valid_mask = (vm),		\ +	.idx = EXTRA_REG_##i,		\ +	.extra_msr_access = true,	\  	}  #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx)	\ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index adb02aa62af5..2502d0d9d246 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1382,6 +1382,15 @@ again:  	intel_pmu_lbr_read();  	/* +	 * CondChgd bit 63 doesn't mean any overflow status. Ignore +	 * and clear the bit. +	 */ +	if (__test_and_clear_bit(63, (unsigned long *)&status)) { +		if (!status) +			goto done; +	} + +	/*  	 * PEBS overflow sets bit 62 in the global status register  	 */  	if (__test_and_clear_bit(62, (unsigned long *)&status)) { @@ -2173,6 +2182,41 @@ static void intel_snb_check_microcode(void)  	}  } +/* + * Under certain circumstances, access certain MSR may cause #GP. + * The function tests if the input MSR can be safely accessed. + */ +static bool check_msr(unsigned long msr, u64 mask) +{ +	u64 val_old, val_new, val_tmp; + +	/* +	 * Read the current value, change it and read it back to see if it +	 * matches, this is needed to detect certain hardware emulators +	 * (qemu/kvm) that don't trap on the MSR access and always return 0s. +	 */ +	if (rdmsrl_safe(msr, &val_old)) +		return false; + +	/* +	 * Only change the bits which can be updated by wrmsrl. +	 */ +	val_tmp = val_old ^ mask; +	if (wrmsrl_safe(msr, val_tmp) || +	    rdmsrl_safe(msr, &val_new)) +		return false; + +	if (val_new != val_tmp) +		return false; + +	/* Here it's sure that the MSR can be safely accessed. +	 * Restore the old value and return. +	 */ +	wrmsrl(msr, val_old); + +	return true; +} +  static __init void intel_sandybridge_quirk(void)  {  	x86_pmu.check_microcode = intel_snb_check_microcode; @@ -2262,7 +2306,8 @@ __init int intel_pmu_init(void)  	union cpuid10_ebx ebx;  	struct event_constraint *c;  	unsigned int unused; -	int version; +	struct extra_reg *er; +	int version, i;  	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {  		switch (boot_cpu_data.x86) { @@ -2465,6 +2510,9 @@ __init int intel_pmu_init(void)  	case 62: /* IvyBridge EP */  		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,  		       sizeof(hw_cache_event_ids)); +		/* dTLB-load-misses on IVB is different than SNB */ +		hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */ +  		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,  		       sizeof(hw_cache_extra_regs)); @@ -2565,6 +2613,34 @@ __init int intel_pmu_init(void)  		}  	} +	/* +	 * Access LBR MSR may cause #GP under certain circumstances. +	 * E.g. KVM doesn't support LBR MSR +	 * Check all LBT MSR here. +	 * Disable LBR access if any LBR MSRs can not be accessed. +	 */ +	if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) +		x86_pmu.lbr_nr = 0; +	for (i = 0; i < x86_pmu.lbr_nr; i++) { +		if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && +		      check_msr(x86_pmu.lbr_to + i, 0xffffUL))) +			x86_pmu.lbr_nr = 0; +	} + +	/* +	 * Access extra MSR may cause #GP under certain circumstances. +	 * E.g. KVM doesn't support offcore event +	 * Check all extra_regs here. +	 */ +	if (x86_pmu.extra_regs) { +		for (er = x86_pmu.extra_regs; er->msr; er++) { +			er->extra_msr_access = check_msr(er->msr, 0x1ffUL); +			/* Disable LBR select mapping */ +			if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access) +				x86_pmu.lbr_sel_map = NULL; +		} +	} +  	/* Support full width counters using alternative MSR range */  	if (x86_pmu.intel_cap.full_width_write) {  		x86_pmu.max_period = x86_pmu.cntval_mask; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 980970cb744d..696ade311ded 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -311,9 +311,11 @@ static int alloc_bts_buffer(int cpu)  	if (!x86_pmu.bts)  		return 0; -	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL, node); -	if (unlikely(!buffer)) +	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); +	if (unlikely(!buffer)) { +		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);  		return -ENOMEM; +	}  	max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;  	thresh = max / 16; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 65bbbea38b9c..ae6552a0701f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -550,16 +550,16 @@ static struct extra_reg snbep_uncore_cbox_extra_regs[] = {  	SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6),  	SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8),  	SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), -	SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xc), -	SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xc), +	SNBEP_CBO_EVENT_EXTRA_REG(0x4135, 0xffff, 0xa), +	SNBEP_CBO_EVENT_EXTRA_REG(0x4335, 0xffff, 0xa),  	SNBEP_CBO_EVENT_EXTRA_REG(0x4435, 0xffff, 0x2),  	SNBEP_CBO_EVENT_EXTRA_REG(0x4835, 0xffff, 0x2),  	SNBEP_CBO_EVENT_EXTRA_REG(0x4a35, 0xffff, 0x2),  	SNBEP_CBO_EVENT_EXTRA_REG(0x5035, 0xffff, 0x2),  	SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x8),  	SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x8), -	SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xc), -	SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xc), +	SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0xa), +	SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0xa),  	SNBEP_CBO_EVENT_EXTRA_REG(0x4436, 0xffff, 0x2),  	SNBEP_CBO_EVENT_EXTRA_REG(0x4836, 0xffff, 0x2),  	SNBEP_CBO_EVENT_EXTRA_REG(0x4a36, 0xffff, 0x2), @@ -1222,6 +1222,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = {  	SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN,  				  SNBEP_CBO_PMON_CTL_TID_EN, 0x1),  	SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), +  	SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),  	SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc),  	SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), @@ -1245,7 +1246,7 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = {  	SNBEP_CBO_EVENT_EXTRA_REG(0x8335, 0xffff, 0x10),  	SNBEP_CBO_EVENT_EXTRA_REG(0x0136, 0xffff, 0x10),  	SNBEP_CBO_EVENT_EXTRA_REG(0x0336, 0xffff, 0x10), -	SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10), +	SNBEP_CBO_EVENT_EXTRA_REG(0x2136, 0xffff, 0x10),  	SNBEP_CBO_EVENT_EXTRA_REG(0x2336, 0xffff, 0x10),  	SNBEP_CBO_EVENT_EXTRA_REG(0x4136, 0xffff, 0x18),  	SNBEP_CBO_EVENT_EXTRA_REG(0x4336, 0xffff, 0x18), diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index dbaa23e78b36..0d0c9d4ab6d5 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -425,8 +425,8 @@ sysenter_do_call:  	cmpl $(NR_syscalls), %eax  	jae sysenter_badsys  	call *sys_call_table(,%eax,4) -	movl %eax,PT_EAX(%esp)  sysenter_after_call: +	movl %eax,PT_EAX(%esp)  	LOCKDEP_SYS_EXIT  	DISABLE_INTERRUPTS(CLBR_ANY)  	TRACE_IRQS_OFF @@ -502,6 +502,7 @@ ENTRY(system_call)  	jae syscall_badsys  syscall_call:  	call *sys_call_table(,%eax,4) +syscall_after_call:  	movl %eax,PT_EAX(%esp)		# store the return value  syscall_exit:  	LOCKDEP_SYS_EXIT @@ -675,12 +676,12 @@ syscall_fault:  END(syscall_fault)  syscall_badsys: -	movl $-ENOSYS,PT_EAX(%esp) -	jmp syscall_exit +	movl $-ENOSYS,%eax +	jmp syscall_after_call  END(syscall_badsys)  sysenter_badsys: -	movl $-ENOSYS,PT_EAX(%esp) +	movl $-ENOSYS,%eax  	jmp sysenter_after_call  END(syscall_badsys)  	CFI_ENDPROC diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 6afbb16e9b79..94d857fb1033 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -175,7 +175,7 @@ void init_espfix_ap(void)  	if (!pud_present(pud)) {  		pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP);  		pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); -		paravirt_alloc_pud(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); +		paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT);  		for (n = 0; n < ESPFIX_PUD_CLONES; n++)  			set_pud(&pud_p[n], pud);  	} @@ -185,7 +185,7 @@ void init_espfix_ap(void)  	if (!pmd_present(pmd)) {  		pte_p = (pte_t *)__get_free_page(PGALLOC_GFP);  		pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); -		paravirt_alloc_pmd(&init_mm, __pa(pte_p) >> PAGE_SHIFT); +		paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT);  		for (n = 0; n < ESPFIX_PMD_CLONES; n++)  			set_pmd(&pmd_p[n], pmd);  	} @@ -193,7 +193,6 @@ void init_espfix_ap(void)  	pte_p = pte_offset_kernel(&pmd, addr);  	stack_page = (void *)__get_free_page(GFP_KERNEL);  	pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); -	paravirt_alloc_pte(&init_mm, __pa(stack_page) >> PAGE_SHIFT);  	for (n = 0; n < ESPFIX_PTE_CLONES; n++)  		set_pte(&pte_p[n*PTE_STRIDE], pte); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 7596df664901..67e6d19ef1be 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -574,6 +574,9 @@ int kprobe_int3_handler(struct pt_regs *regs)  	struct kprobe *p;  	struct kprobe_ctlblk *kcb; +	if (user_mode_vm(regs)) +		return 0; +  	addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));  	/*  	 * We don't want to be preempted for the entire diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 57e5ce126d5a..ea030319b321 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -920,9 +920,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,  		tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);  		if (!(freq->flags & CPUFREQ_CONST_LOOPS))  			mark_tsc_unstable("cpufreq changes"); -	} -	set_cyc2ns_scale(tsc_khz, freq->cpu); +		set_cyc2ns_scale(tsc_khz, freq->cpu); +	}  	return 0;  } |