diff options
Diffstat (limited to 'arch/x86/kernel')
| -rw-r--r-- | arch/x86/kernel/cpu/hypervisor.c | 4 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 7 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 83 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 24 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 118 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 4 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_pt.c | 36 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_rapl.c | 1 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 18 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.h | 19 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 12 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c | 6 | ||||
| -rw-r--r-- | arch/x86/kernel/head64.c | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/head_32.S | 33 | ||||
| -rw-r--r-- | arch/x86/kernel/head_64.S | 20 | ||||
| -rw-r--r-- | arch/x86/kernel/i387.c | 15 | ||||
| -rw-r--r-- | arch/x86/kernel/process.c | 14 | ||||
| -rw-r--r-- | arch/x86/kernel/pvclock.c | 44 | 
18 files changed, 255 insertions, 205 deletions
| diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index 36ce402a3fa5..d820d8eae96b 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -27,8 +27,8 @@  static const __initconst struct hypervisor_x86 * const hypervisors[] =  { -#ifdef CONFIG_XEN_PVHVM -	&x86_hyper_xen_hvm, +#ifdef CONFIG_XEN +	&x86_hyper_xen,  #endif  	&x86_hyper_vmware,  	&x86_hyper_ms_hyperv, diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index e535533d5ab8..20190bdac9d5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -708,6 +708,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,  			  struct pt_regs *regs)  {  	int i, ret = 0; +	char *tmp;  	for (i = 0; i < mca_cfg.banks; i++) {  		m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); @@ -716,9 +717,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,  			if (quirk_no_way_out)  				quirk_no_way_out(i, m, regs);  		} -		if (mce_severity(m, mca_cfg.tolerant, msg, true) >= -		    MCE_PANIC_SEVERITY) + +		if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { +			*msg = tmp;  			ret = 1; +		}  	}  	return ret;  } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 87848ebe2bb7..4f7001f28936 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -190,6 +190,7 @@ static bool check_hw_exists(void)  	u64 val, val_fail, val_new= ~0;  	int i, reg, reg_fail, ret = 0;  	int bios_fail = 0; +	int reg_safe = -1;  	/*  	 * Check to see if the BIOS enabled any of the counters, if so @@ -204,6 +205,8 @@ static bool check_hw_exists(void)  			bios_fail = 1;  			val_fail = val;  			reg_fail = reg; +		} else { +			reg_safe = i;  		}  	} @@ -222,11 +225,22 @@ static bool check_hw_exists(void)  	}  	/* +	 * If all the counters are enabled, the below test will always +	 * fail.  The tools will also become useless in this scenario. +	 * Just fail and disable the hardware counters. +	 */ + +	if (reg_safe == -1) { +		reg = reg_safe; +		goto msr_fail; +	} + +	/*  	 * Read the current value, change it and read it back to see if it  	 * matches, this is needed to detect certain hardware emulators  	 * (qemu/kvm) that don't trap on the MSR access and always return 0s.  	 */ -	reg = x86_pmu_event_addr(0); +	reg = x86_pmu_event_addr(reg_safe);  	if (rdmsrl_safe(reg, &val))  		goto msr_fail;  	val ^= 0xffffUL; @@ -611,6 +625,7 @@ struct sched_state {  	int	event;		/* event index */  	int	counter;	/* counter index */  	int	unassigned;	/* number of events to be assigned left */ +	int	nr_gp;		/* number of GP counters used */  	unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];  }; @@ -620,27 +635,29 @@ struct sched_state {  struct perf_sched {  	int			max_weight;  	int			max_events; -	struct perf_event	**events; -	struct sched_state	state; +	int			max_gp;  	int			saved_states; +	struct event_constraint	**constraints; +	struct sched_state	state;  	struct sched_state	saved[SCHED_STATES_MAX];  };  /*   * Initialize interator that runs through all events and counters.   */ -static void perf_sched_init(struct perf_sched *sched, struct perf_event **events, -			    int num, int wmin, int wmax) +static void perf_sched_init(struct perf_sched *sched, struct event_constraint **constraints, +			    int num, int wmin, int wmax, int gpmax)  {  	int idx;  	memset(sched, 0, sizeof(*sched));  	sched->max_events	= num;  	sched->max_weight	= wmax; -	sched->events		= events; +	sched->max_gp		= gpmax; +	sched->constraints	= constraints;  	for (idx = 0; idx < num; idx++) { -		if (events[idx]->hw.constraint->weight == wmin) +		if (constraints[idx]->weight == wmin)  			break;  	} @@ -687,7 +704,7 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)  	if (sched->state.event >= sched->max_events)  		return false; -	c = sched->events[sched->state.event]->hw.constraint; +	c = sched->constraints[sched->state.event];  	/* Prefer fixed purpose counters */  	if (c->idxmsk64 & (~0ULL << INTEL_PMC_IDX_FIXED)) {  		idx = INTEL_PMC_IDX_FIXED; @@ -696,11 +713,16 @@ static bool __perf_sched_find_counter(struct perf_sched *sched)  				goto done;  		}  	} +  	/* Grab the first unused counter starting with idx */  	idx = sched->state.counter;  	for_each_set_bit_from(idx, c->idxmsk, INTEL_PMC_IDX_FIXED) { -		if (!__test_and_set_bit(idx, sched->state.used)) +		if (!__test_and_set_bit(idx, sched->state.used)) { +			if (sched->state.nr_gp++ >= sched->max_gp) +				return false; +  			goto done; +		}  	}  	return false; @@ -745,7 +767,7 @@ static bool perf_sched_next_event(struct perf_sched *sched)  			if (sched->state.weight > sched->max_weight)  				return false;  		} -		c = sched->events[sched->state.event]->hw.constraint; +		c = sched->constraints[sched->state.event];  	} while (c->weight != sched->state.weight);  	sched->state.counter = 0;	/* start with first counter */ @@ -756,12 +778,12 @@ static bool perf_sched_next_event(struct perf_sched *sched)  /*   * Assign a counter for each event.   */ -int perf_assign_events(struct perf_event **events, int n, -			int wmin, int wmax, int *assign) +int perf_assign_events(struct event_constraint **constraints, int n, +			int wmin, int wmax, int gpmax, int *assign)  {  	struct perf_sched sched; -	perf_sched_init(&sched, events, n, wmin, wmax); +	perf_sched_init(&sched, constraints, n, wmin, wmax, gpmax);  	do {  		if (!perf_sched_find_counter(&sched)) @@ -788,9 +810,9 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)  		x86_pmu.start_scheduling(cpuc);  	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { -		hwc = &cpuc->event_list[i]->hw; +		cpuc->event_constraint[i] = NULL;  		c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]); -		hwc->constraint = c; +		cpuc->event_constraint[i] = c;  		wmin = min(wmin, c->weight);  		wmax = max(wmax, c->weight); @@ -801,7 +823,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)  	 */  	for (i = 0; i < n; i++) {  		hwc = &cpuc->event_list[i]->hw; -		c = hwc->constraint; +		c = cpuc->event_constraint[i];  		/* never assigned */  		if (hwc->idx == -1) @@ -821,9 +843,26 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)  	}  	/* slow path */ -	if (i != n) -		unsched = perf_assign_events(cpuc->event_list, n, wmin, -					     wmax, assign); +	if (i != n) { +		int gpmax = x86_pmu.num_counters; + +		/* +		 * Do not allow scheduling of more than half the available +		 * generic counters. +		 * +		 * This helps avoid counter starvation of sibling thread by +		 * ensuring at most half the counters cannot be in exclusive +		 * mode. There is no designated counters for the limits. Any +		 * N/2 counters can be used. This helps with events with +		 * specific counter constraints. +		 */ +		if (is_ht_workaround_enabled() && !cpuc->is_fake && +		    READ_ONCE(cpuc->excl_cntrs->exclusive_present)) +			gpmax /= 2; + +		unsched = perf_assign_events(cpuc->event_constraint, n, wmin, +					     wmax, gpmax, assign); +	}  	/*  	 * In case of success (unsched = 0), mark events as committed, @@ -840,7 +879,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)  			e = cpuc->event_list[i];  			e->hw.flags |= PERF_X86_EVENT_COMMITTED;  			if (x86_pmu.commit_scheduling) -				x86_pmu.commit_scheduling(cpuc, e, assign[i]); +				x86_pmu.commit_scheduling(cpuc, i, assign[i]);  		}  	} @@ -1292,8 +1331,10 @@ static void x86_pmu_del(struct perf_event *event, int flags)  		x86_pmu.put_event_constraints(cpuc, event);  	/* Delete the array entry. */ -	while (++i < cpuc->n_events) +	while (++i < cpuc->n_events) {  		cpuc->event_list[i-1] = cpuc->event_list[i]; +		cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; +	}  	--cpuc->n_events;  	perf_event_update_userpage(event); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6ac5cb7a9e14..ef78516850fb 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -74,6 +74,7 @@ struct event_constraint {  #define PERF_X86_EVENT_EXCL		0x0040 /* HT exclusivity on counter */  #define PERF_X86_EVENT_DYNAMIC		0x0080 /* dynamic alloc'd constraint */  #define PERF_X86_EVENT_RDPMC_ALLOWED	0x0100 /* grant rdpmc permission */ +#define PERF_X86_EVENT_EXCL_ACCT	0x0200 /* accounted EXCL event */  struct amd_nb { @@ -134,8 +135,6 @@ enum intel_excl_state_type {  struct intel_excl_states {  	enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];  	enum intel_excl_state_type state[X86_PMC_IDX_MAX]; -	int  num_alloc_cntrs;/* #counters allocated */ -	int  max_alloc_cntrs;/* max #counters allowed */  	bool sched_started; /* true if scheduling has started */  }; @@ -144,6 +143,11 @@ struct intel_excl_cntrs {  	struct intel_excl_states states[2]; +	union { +		u16	has_exclusive[2]; +		u32	exclusive_present; +	}; +  	int		refcnt;		/* per-core: #HT threads */  	unsigned	core_id;	/* per-core: core id */  }; @@ -172,7 +176,11 @@ struct cpu_hw_events {  					     added in the current transaction */  	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */  	u64			tags[X86_PMC_IDX_MAX]; +  	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */ +	struct event_constraint	*event_constraint[X86_PMC_IDX_MAX]; + +	int			n_excl; /* the number of exclusive events */  	unsigned int		group_flag;  	int			is_fake; @@ -519,9 +527,7 @@ struct x86_pmu {  	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,  						 struct perf_event *event); -	void		(*commit_scheduling)(struct cpu_hw_events *cpuc, -					     struct perf_event *event, -					     int cntr); +	void		(*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);  	void		(*start_scheduling)(struct cpu_hw_events *cpuc); @@ -717,8 +723,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,  void x86_pmu_enable_all(int added); -int perf_assign_events(struct perf_event **events, int n, -			int wmin, int wmax, int *assign); +int perf_assign_events(struct event_constraint **constraints, int n, +			int wmin, int wmax, int gpmax, int *assign);  int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign);  void x86_pmu_stop(struct perf_event *event, int flags); @@ -929,4 +935,8 @@ static inline struct intel_shared_regs *allocate_shared_regs(int cpu)  	return NULL;  } +static inline int is_ht_workaround_enabled(void) +{ +	return 0; +}  #endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 219d3fb423a1..a1e35c9f06b9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1134,7 +1134,7 @@ static __initconst const u64 slm_hw_cache_extra_regs   [ C(LL  ) ] = {  	[ C(OP_READ) ] = {  		[ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, -		[ C(RESULT_MISS)   ] = SLM_DMND_READ|SLM_LLC_MISS, +		[ C(RESULT_MISS)   ] = 0,  	},  	[ C(OP_WRITE) ] = {  		[ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, @@ -1184,8 +1184,7 @@ static __initconst const u64 slm_hw_cache_event_ids  	[ C(OP_READ) ] = {  		/* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */  		[ C(RESULT_ACCESS) ] = 0x01b7, -		/* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ -		[ C(RESULT_MISS)   ] = 0x01b7, +		[ C(RESULT_MISS)   ] = 0,  	},  	[ C(OP_WRITE) ] = {  		/* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ @@ -1217,7 +1216,7 @@ static __initconst const u64 slm_hw_cache_event_ids   [ C(ITLB) ] = {  	[ C(OP_READ) ] = {  		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ -		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES */ +		[ C(RESULT_MISS)   ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */  	},  	[ C(OP_WRITE) ] = {  		[ C(RESULT_ACCESS) ] = -1, @@ -1924,7 +1923,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)  	xl = &excl_cntrs->states[tid];  	xl->sched_started = true; -	xl->num_alloc_cntrs = 0;  	/*  	 * lock shared state until we are done scheduling  	 * in stop_event_scheduling() @@ -2001,6 +1999,11 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,  	 * across HT threads  	 */  	is_excl = c->flags & PERF_X86_EVENT_EXCL; +	if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) { +		event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT; +		if (!cpuc->n_excl++) +			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1); +	}  	/*  	 * xl = state of current HT @@ -2009,18 +2012,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,  	xl = &excl_cntrs->states[tid];  	xlo = &excl_cntrs->states[o_tid]; -	/* -	 * do not allow scheduling of more than max_alloc_cntrs -	 * which is set to half the available generic counters. -	 * this helps avoid counter starvation of sibling thread -	 * by ensuring at most half the counters cannot be in -	 * exclusive mode. There is not designated counters for the -	 * limits. Any N/2 counters can be used. This helps with -	 * events with specifix counter constraints -	 */ -	if (xl->num_alloc_cntrs++ == xl->max_alloc_cntrs) -		return &emptyconstraint; -  	cx = c;  	/* @@ -2107,7 +2098,7 @@ static struct event_constraint *  intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,  			    struct perf_event *event)  { -	struct event_constraint *c1 = event->hw.constraint; +	struct event_constraint *c1 = cpuc->event_constraint[idx];  	struct event_constraint *c2;  	/* @@ -2151,6 +2142,11 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,  	xl = &excl_cntrs->states[tid];  	xlo = &excl_cntrs->states[o_tid]; +	if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) { +		hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT; +		if (!--cpuc->n_excl) +			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0); +	}  	/*  	 * put_constraint may be called from x86_schedule_events() @@ -2189,8 +2185,6 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,  static void intel_put_event_constraints(struct cpu_hw_events *cpuc,  					struct perf_event *event)  { -	struct event_constraint *c = event->hw.constraint; -  	intel_put_shared_regs_event_constraints(cpuc, event);  	/* @@ -2198,19 +2192,14 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,  	 * all events are subject to and must call the  	 * put_excl_constraints() routine  	 */ -	if (c && cpuc->excl_cntrs) +	if (cpuc->excl_cntrs)  		intel_put_excl_constraints(cpuc, event); - -	/* cleanup dynamic constraint */ -	if (c && (c->flags & PERF_X86_EVENT_DYNAMIC)) -		event->hw.constraint = NULL;  } -static void intel_commit_scheduling(struct cpu_hw_events *cpuc, -				    struct perf_event *event, int cntr) +static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)  {  	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs; -	struct event_constraint *c = event->hw.constraint; +	struct event_constraint *c = cpuc->event_constraint[idx];  	struct intel_excl_states *xlo, *xl;  	int tid = cpuc->excl_thread_id;  	int o_tid = 1 - tid; @@ -2533,34 +2522,6 @@ ssize_t intel_event_sysfs_show(char *page, u64 config)  	return x86_event_sysfs_show(page, config, event);  } -static __initconst const struct x86_pmu core_pmu = { -	.name			= "core", -	.handle_irq		= x86_pmu_handle_irq, -	.disable_all		= x86_pmu_disable_all, -	.enable_all		= core_pmu_enable_all, -	.enable			= core_pmu_enable_event, -	.disable		= x86_pmu_disable_event, -	.hw_config		= x86_pmu_hw_config, -	.schedule_events	= x86_schedule_events, -	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0, -	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0, -	.event_map		= intel_pmu_event_map, -	.max_events		= ARRAY_SIZE(intel_perfmon_event_map), -	.apic			= 1, -	/* -	 * Intel PMCs cannot be accessed sanely above 32 bit width, -	 * so we install an artificial 1<<31 period regardless of -	 * the generic event period: -	 */ -	.max_period		= (1ULL << 31) - 1, -	.get_event_constraints	= intel_get_event_constraints, -	.put_event_constraints	= intel_put_event_constraints, -	.event_constraints	= intel_core_event_constraints, -	.guest_get_msrs		= core_guest_get_msrs, -	.format_attrs		= intel_arch_formats_attr, -	.events_sysfs_show	= intel_event_sysfs_show, -}; -  struct intel_shared_regs *allocate_shared_regs(int cpu)  {  	struct intel_shared_regs *regs; @@ -2668,8 +2629,6 @@ static void intel_pmu_cpu_starting(int cpu)  		cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];  	if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) { -		int h = x86_pmu.num_counters >> 1; -  		for_each_cpu(i, topology_thread_cpumask(cpu)) {  			struct intel_excl_cntrs *c; @@ -2683,11 +2642,6 @@ static void intel_pmu_cpu_starting(int cpu)  		}  		cpuc->excl_cntrs->core_id = core_id;  		cpuc->excl_cntrs->refcnt++; -		/* -		 * set hard limit to half the number of generic counters -		 */ -		cpuc->excl_cntrs->states[0].max_alloc_cntrs = h; -		cpuc->excl_cntrs->states[1].max_alloc_cntrs = h;  	}  } @@ -2743,6 +2697,44 @@ static struct attribute *intel_arch3_formats_attr[] = {  	NULL,  }; +static __initconst const struct x86_pmu core_pmu = { +	.name			= "core", +	.handle_irq		= x86_pmu_handle_irq, +	.disable_all		= x86_pmu_disable_all, +	.enable_all		= core_pmu_enable_all, +	.enable			= core_pmu_enable_event, +	.disable		= x86_pmu_disable_event, +	.hw_config		= x86_pmu_hw_config, +	.schedule_events	= x86_schedule_events, +	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0, +	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0, +	.event_map		= intel_pmu_event_map, +	.max_events		= ARRAY_SIZE(intel_perfmon_event_map), +	.apic			= 1, +	/* +	 * Intel PMCs cannot be accessed sanely above 32-bit width, +	 * so we install an artificial 1<<31 period regardless of +	 * the generic event period: +	 */ +	.max_period		= (1ULL<<31) - 1, +	.get_event_constraints	= intel_get_event_constraints, +	.put_event_constraints	= intel_put_event_constraints, +	.event_constraints	= intel_core_event_constraints, +	.guest_get_msrs		= core_guest_get_msrs, +	.format_attrs		= intel_arch_formats_attr, +	.events_sysfs_show	= intel_event_sysfs_show, + +	/* +	 * Virtual (or funny metal) CPU can define x86_pmu.extra_regs +	 * together with PMU version 1 and thus be using core_pmu with +	 * shared_regs. We need following callbacks here to allocate +	 * it properly. +	 */ +	.cpu_prepare		= intel_pmu_cpu_prepare, +	.cpu_starting		= intel_pmu_cpu_starting, +	.cpu_dying		= intel_pmu_cpu_dying, +}; +  static __initconst const struct x86_pmu intel_pmu = {  	.name			= "Intel",  	.handle_irq		= intel_pmu_handle_irq, diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 813f75d71175..7f73b3553e2e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -706,9 +706,9 @@ void intel_pmu_pebs_disable(struct perf_event *event)  	cpuc->pebs_enabled &= ~(1ULL << hwc->idx); -	if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT) +	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)  		cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); -	else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST) +	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)  		cpuc->pebs_enabled &= ~(1ULL << 63);  	if (cpuc->enabled) diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c index ffe666c2c6b5..123ff1bb2f60 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_pt.c +++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c @@ -151,7 +151,7 @@ static int __init pt_pmu_hw_init(void)  		de_attr->attr.attr.name = pt_caps[i].name; -		sysfs_attr_init(&de_attrs->attr.attr); +		sysfs_attr_init(&de_attr->attr.attr);  		de_attr->attr.attr.mode		= S_IRUGO;  		de_attr->attr.show		= pt_cap_show; @@ -615,7 +615,8 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,  				   struct perf_output_handle *handle)  { -	unsigned long idx, npages, end; +	unsigned long head = local64_read(&buf->head); +	unsigned long idx, npages, wakeup;  	if (buf->snapshot)  		return 0; @@ -634,17 +635,26 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,  	buf->topa_index[buf->stop_pos]->stop = 0;  	buf->topa_index[buf->intr_pos]->intr = 0; -	if (pt_cap_get(PT_CAP_topa_multiple_entries)) { -		npages = (handle->size + 1) >> PAGE_SHIFT; -		end = (local64_read(&buf->head) >> PAGE_SHIFT) + npages; -		/*if (end > handle->wakeup >> PAGE_SHIFT) -		  end = handle->wakeup >> PAGE_SHIFT;*/ -		idx = end & (buf->nr_pages - 1); -		buf->stop_pos = idx; -		idx = (local64_read(&buf->head) >> PAGE_SHIFT) + npages - 1; -		idx &= buf->nr_pages - 1; -		buf->intr_pos = idx; -	} +	/* how many pages till the STOP marker */ +	npages = handle->size >> PAGE_SHIFT; + +	/* if it's on a page boundary, fill up one more page */ +	if (!offset_in_page(head + handle->size + 1)) +		npages++; + +	idx = (head >> PAGE_SHIFT) + npages; +	idx &= buf->nr_pages - 1; +	buf->stop_pos = idx; + +	wakeup = handle->wakeup >> PAGE_SHIFT; + +	/* in the worst case, wake up the consumer one page before hard stop */ +	idx = (head >> PAGE_SHIFT) + npages - 1; +	if (idx > wakeup) +		idx = wakeup; + +	idx &= buf->nr_pages - 1; +	buf->intr_pos = idx;  	buf->topa_index[buf->stop_pos]->stop = 1;  	buf->topa_index[buf->intr_pos]->intr = 1; diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 999289b94025..358c54ad20d4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -722,6 +722,7 @@ static int __init rapl_pmu_init(void)  		break;  	case 60: /* Haswell */  	case 69: /* Haswell-Celeron */ +	case 61: /* Broadwell */  		rapl_cntr_mask = RAPL_IDX_HSW;  		rapl_pmu_events_group.attrs = rapl_events_hsw_attr;  		break; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index c635b8b49e93..90b7c501c95b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c @@ -365,9 +365,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int  	bitmap_zero(used_mask, UNCORE_PMC_IDX_MAX);  	for (i = 0, wmin = UNCORE_PMC_IDX_MAX, wmax = 0; i < n; i++) { -		hwc = &box->event_list[i]->hw;  		c = uncore_get_event_constraint(box, box->event_list[i]); -		hwc->constraint = c; +		box->event_constraint[i] = c;  		wmin = min(wmin, c->weight);  		wmax = max(wmax, c->weight);  	} @@ -375,7 +374,7 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int  	/* fastpath, try to reuse previous register */  	for (i = 0; i < n; i++) {  		hwc = &box->event_list[i]->hw; -		c = hwc->constraint; +		c = box->event_constraint[i];  		/* never assigned */  		if (hwc->idx == -1) @@ -395,8 +394,8 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int  	}  	/* slow path */  	if (i != n) -		ret = perf_assign_events(box->event_list, n, -					 wmin, wmax, assign); +		ret = perf_assign_events(box->event_constraint, n, +					 wmin, wmax, n, assign);  	if (!assign || ret) {  		for (i = 0; i < n; i++) @@ -840,6 +839,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id  	box->phys_id = phys_id;  	box->pci_dev = pdev;  	box->pmu = pmu; +	uncore_box_init(box);  	pci_set_drvdata(pdev, box);  	raw_spin_lock(&uncore_box_lock); @@ -1003,8 +1003,10 @@ static int uncore_cpu_starting(int cpu)  			pmu = &type->pmus[j];  			box = *per_cpu_ptr(pmu->box, cpu);  			/* called by uncore_cpu_init? */ -			if (box && box->phys_id >= 0) +			if (box && box->phys_id >= 0) { +				uncore_box_init(box);  				continue; +			}  			for_each_online_cpu(k) {  				exist = *per_cpu_ptr(pmu->box, k); @@ -1020,8 +1022,10 @@ static int uncore_cpu_starting(int cpu)  				}  			} -			if (box) +			if (box) {  				box->phys_id = phys_id; +				uncore_box_init(box); +			}  		}  	}  	return 0; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index 6c8c1e7e69d8..ceac8f5dc018 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h @@ -97,6 +97,7 @@ struct intel_uncore_box {  	atomic_t refcnt;  	struct perf_event *events[UNCORE_PMC_IDX_MAX];  	struct perf_event *event_list[UNCORE_PMC_IDX_MAX]; +	struct event_constraint *event_constraint[UNCORE_PMC_IDX_MAX];  	unsigned long active_mask[BITS_TO_LONGS(UNCORE_PMC_IDX_MAX)];  	u64 tags[UNCORE_PMC_IDX_MAX];  	struct pci_dev *pci_dev; @@ -257,14 +258,6 @@ static inline int uncore_num_counters(struct intel_uncore_box *box)  	return box->pmu->type->num_counters;  } -static inline void uncore_box_init(struct intel_uncore_box *box) -{ -	if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { -		if (box->pmu->type->ops->init_box) -			box->pmu->type->ops->init_box(box); -	} -} -  static inline void uncore_disable_box(struct intel_uncore_box *box)  {  	if (box->pmu->type->ops->disable_box) @@ -273,8 +266,6 @@ static inline void uncore_disable_box(struct intel_uncore_box *box)  static inline void uncore_enable_box(struct intel_uncore_box *box)  { -	uncore_box_init(box); -  	if (box->pmu->type->ops->enable_box)  		box->pmu->type->ops->enable_box(box);  } @@ -297,6 +288,14 @@ static inline u64 uncore_read_counter(struct intel_uncore_box *box,  	return box->pmu->type->ops->read_counter(box, event);  } +static inline void uncore_box_init(struct intel_uncore_box *box) +{ +	if (!test_and_set_bit(UNCORE_BOX_FLAG_INITIATED, &box->flags)) { +		if (box->pmu->type->ops->init_box) +			box->pmu->type->ops->init_box(box); +	} +} +  static inline bool uncore_box_is_fake(struct intel_uncore_box *box)  {  	return (box->phys_id < 0); diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c index 3001015b755c..4562e9e22c60 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c @@ -1,6 +1,13 @@  /* Nehalem/SandBridge/Haswell uncore support */  #include "perf_event_intel_uncore.h" +/* Uncore IMC PCI IDs */ +#define PCI_DEVICE_ID_INTEL_SNB_IMC	0x0100 +#define PCI_DEVICE_ID_INTEL_IVB_IMC	0x0154 +#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC	0x0150 +#define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00 +#define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04 +  /* SNB event control */  #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff  #define SNB_UNC_CTL_UMASK_MASK			0x0000ff00 @@ -472,6 +479,10 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {  		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC),  		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),  	}, +	{ /* IMC */ +		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_U_IMC), +		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), +	},  	{ /* end: all zeroes */ },  }; @@ -502,6 +513,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {  	IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver),    /* 3rd Gen Core processor */  	IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */  	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */ +	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */  	{  /* end marker */ }  }; diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c index 12d9548457e7..6d6e85dd5849 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snbep.c @@ -164,8 +164,8 @@  				((1ULL << (n)) - 1)))  /* Haswell-EP Ubox */ -#define HSWEP_U_MSR_PMON_CTR0			0x705 -#define HSWEP_U_MSR_PMON_CTL0			0x709 +#define HSWEP_U_MSR_PMON_CTR0			0x709 +#define HSWEP_U_MSR_PMON_CTL0			0x705  #define HSWEP_U_MSR_PMON_FILTER			0x707  #define HSWEP_U_MSR_PMON_UCLK_FIXED_CTL		0x703 @@ -1914,7 +1914,7 @@ static struct intel_uncore_type hswep_uncore_cbox = {  	.name			= "cbox",  	.num_counters		= 4,  	.num_boxes		= 18, -	.perf_ctr_bits		= 44, +	.perf_ctr_bits		= 48,  	.event_ctl		= HSWEP_C0_MSR_PMON_CTL0,  	.perf_ctr		= HSWEP_C0_MSR_PMON_CTR0,  	.event_mask		= SNBEP_CBO_MSR_PMON_RAW_EVENT_MASK, diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 2b55ee6db053..5a4668136e98 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -167,7 +167,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)  	clear_bss();  	for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) -		set_intr_gate(i, early_idt_handlers[i]); +		set_intr_gate(i, early_idt_handler_array[i]);  	load_idt((const struct desc_ptr *)&idt_descr);  	copy_bootdata(__va(real_mode_data)); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index d031bad9e07e..53eeb226657c 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -478,21 +478,22 @@ is486:  __INIT  setup_once:  	/* -	 * Set up a idt with 256 entries pointing to ignore_int, -	 * interrupt gates. It doesn't actually load idt - that needs -	 * to be done on each CPU. Interrupts are enabled elsewhere, -	 * when we can be relatively sure everything is ok. +	 * Set up a idt with 256 interrupt gates that push zero if there +	 * is no error code and then jump to early_idt_handler_common. +	 * It doesn't actually load the idt - that needs to be done on +	 * each CPU. Interrupts are enabled elsewhere, when we can be +	 * relatively sure everything is ok.  	 */  	movl $idt_table,%edi -	movl $early_idt_handlers,%eax +	movl $early_idt_handler_array,%eax  	movl $NUM_EXCEPTION_VECTORS,%ecx  1:  	movl %eax,(%edi)  	movl %eax,4(%edi)  	/* interrupt gate, dpl=0, present */  	movl $(0x8E000000 + __KERNEL_CS),2(%edi) -	addl $9,%eax +	addl $EARLY_IDT_HANDLER_SIZE,%eax  	addl $8,%edi  	loop 1b @@ -524,26 +525,28 @@ setup_once:  	andl $0,setup_once_ref	/* Once is enough, thanks */  	ret -ENTRY(early_idt_handlers) +ENTRY(early_idt_handler_array)  	# 36(%esp) %eflags  	# 32(%esp) %cs  	# 28(%esp) %eip  	# 24(%rsp) error code  	i = 0  	.rept NUM_EXCEPTION_VECTORS -	.if (EXCEPTION_ERRCODE_MASK >> i) & 1 -	ASM_NOP2 -	.else +	.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1  	pushl $0		# Dummy error code, to make stack frame uniform  	.endif  	pushl $i		# 20(%esp) Vector number -	jmp early_idt_handler +	jmp early_idt_handler_common  	i = i + 1 +	.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc  	.endr -ENDPROC(early_idt_handlers) +ENDPROC(early_idt_handler_array) -	/* This is global to keep gas from relaxing the jumps */ -ENTRY(early_idt_handler) +early_idt_handler_common: +	/* +	 * The stack is the hardware frame, an error code or zero, and the +	 * vector number. +	 */  	cld  	cmpl $2,(%esp)		# X86_TRAP_NMI @@ -603,7 +606,7 @@ ex_entry:  is_nmi:  	addl $8,%esp		/* drop vector number and error code */  	iret -ENDPROC(early_idt_handler) +ENDPROC(early_idt_handler_common)  /* This is the default interrupt "handler" :-) */  	ALIGN diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index ae6588b301c2..df7e78057ae0 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -321,26 +321,28 @@ bad_address:  	jmp bad_address  	__INIT -	.globl early_idt_handlers -early_idt_handlers: +ENTRY(early_idt_handler_array)  	# 104(%rsp) %rflags  	#  96(%rsp) %cs  	#  88(%rsp) %rip  	#  80(%rsp) error code  	i = 0  	.rept NUM_EXCEPTION_VECTORS -	.if (EXCEPTION_ERRCODE_MASK >> i) & 1 -	ASM_NOP2 -	.else +	.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1  	pushq $0		# Dummy error code, to make stack frame uniform  	.endif  	pushq $i		# 72(%rsp) Vector number -	jmp early_idt_handler +	jmp early_idt_handler_common  	i = i + 1 +	.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc  	.endr +ENDPROC(early_idt_handler_array) -/* This is global to keep gas from relaxing the jumps */ -ENTRY(early_idt_handler) +early_idt_handler_common: +	/* +	 * The stack is the hardware frame, an error code or zero, and the +	 * vector number. +	 */  	cld  	cmpl $2,(%rsp)		# X86_TRAP_NMI @@ -412,7 +414,7 @@ ENTRY(early_idt_handler)  is_nmi:  	addq $16,%rsp		# drop vector number and error code  	INTERRUPT_RETURN -ENDPROC(early_idt_handler) +ENDPROC(early_idt_handler_common)  	__INITDATA diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 009183276bb7..6185d3141219 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -173,6 +173,21 @@ static void init_thread_xstate(void)  		xstate_size = sizeof(struct i387_fxsave_struct);  	else  		xstate_size = sizeof(struct i387_fsave_struct); + +	/* +	 * Quirk: we don't yet handle the XSAVES* instructions +	 * correctly, as we don't correctly convert between +	 * standard and compacted format when interfacing +	 * with user-space - so disable it for now. +	 * +	 * The difference is small: with recent CPUs the +	 * compacted format is only marginally smaller than +	 * the standard FPU state format. +	 * +	 * ( This is easy to backport while we are fixing +	 *   XSAVES* support. ) +	 */ +	setup_clear_cpu_cap(X86_FEATURE_XSAVES);  }  /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8213da62b1b7..6e338e3b1dc0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -57,7 +57,7 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {  	.io_bitmap		= { [0 ... IO_BITMAP_LONGS] = ~0 },  #endif  }; -EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss); +EXPORT_PER_CPU_SYMBOL(cpu_tss);  #ifdef CONFIG_X86_64  static DEFINE_PER_CPU(unsigned char, is_idle); @@ -156,11 +156,13 @@ void flush_thread(void)  		/* FPU state will be reallocated lazily at the first use. */  		drop_fpu(tsk);  		free_thread_xstate(tsk); -	} else if (!used_math()) { -		/* kthread execs. TODO: cleanup this horror. */ -		if (WARN_ON(init_fpu(tsk))) -			force_sig(SIGKILL, tsk); -		user_fpu_begin(); +	} else { +		if (!tsk_used_math(tsk)) { +			/* kthread execs. TODO: cleanup this horror. */ +			if (WARN_ON(init_fpu(tsk))) +				force_sig(SIGKILL, tsk); +			user_fpu_begin(); +		}  		restore_init_xstate();  	}  } diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index e5ecd20e72dd..2f355d229a58 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -141,46 +141,7 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,  	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);  } -static struct pvclock_vsyscall_time_info *pvclock_vdso_info; - -static struct pvclock_vsyscall_time_info * -pvclock_get_vsyscall_user_time_info(int cpu) -{ -	if (!pvclock_vdso_info) { -		BUG(); -		return NULL; -	} - -	return &pvclock_vdso_info[cpu]; -} - -struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) -{ -	return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; -} -  #ifdef CONFIG_X86_64 -static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, -			        void *v) -{ -	struct task_migration_notifier *mn = v; -	struct pvclock_vsyscall_time_info *pvti; - -	pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); - -	/* this is NULL when pvclock vsyscall is not initialized */ -	if (unlikely(pvti == NULL)) -		return NOTIFY_DONE; - -	pvti->migrate_count++; - -	return NOTIFY_DONE; -} - -static struct notifier_block pvclock_migrate = { -	.notifier_call = pvclock_task_migrate, -}; -  /*   * Initialize the generic pvclock vsyscall state.  This will allocate   * a/some page(s) for the per-vcpu pvclock information, set up a @@ -194,17 +155,12 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,  	WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); -	pvclock_vdso_info = i; -  	for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {  		__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,  			     __pa(i) + (idx*PAGE_SIZE),  			     PAGE_KERNEL_VVAR);  	} - -	register_task_migration_notifier(&pvclock_migrate); -  	return 0;  }  #endif |