diff options
Diffstat (limited to 'arch/x86/kernel/apic/apic.c')
| -rw-r--r-- | arch/x86/kernel/apic/apic.c | 280 | 
1 files changed, 182 insertions, 98 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f5291362da1a..9e2dd2b296cd 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -65,10 +65,10 @@ unsigned int num_processors;  unsigned disabled_cpus;  /* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; +unsigned int boot_cpu_physical_apicid __ro_after_init = -1U;  EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); -u8 boot_cpu_apic_version; +u8 boot_cpu_apic_version __ro_after_init;  /*   * The highest APIC ID seen during enumeration. @@ -85,13 +85,13 @@ physid_mask_t phys_cpu_present_map;   * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to   * avoid undefined behaviour caused by sending INIT from AP to BSP.   */ -static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID; +static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;  /*   * This variable controls which CPUs receive external NMIs.  By default,   * external NMIs are delivered only to the BSP.   */ -static int apic_extnmi = APIC_EXTNMI_BSP; +static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;  /*   * Map cpu index to physical APIC ID @@ -114,7 +114,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);  DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);  /* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; +static int enabled_via_apicbase __ro_after_init;  /*   * Handle interrupt mode configuration register (IMCR). @@ -172,23 +172,23 @@ static __init int setup_apicpmtimer(char *s)  __setup("apicpmtimer", setup_apicpmtimer);  #endif -unsigned long mp_lapic_addr; -int disable_apic; +unsigned long mp_lapic_addr __ro_after_init; +int disable_apic __ro_after_init;  /* Disable local APIC timer from the kernel commandline or via dmi quirk */  static int disable_apic_timer __initdata;  /* Local APIC timer works in C2 */ -int local_apic_timer_c2_ok; +int local_apic_timer_c2_ok __ro_after_init;  EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);  /*   * Debug level, exported for io_apic.c   */ -int apic_verbosity; +int apic_verbosity __ro_after_init; -int pic_mode; +int pic_mode __ro_after_init;  /* Have we found an MP table */ -int smp_found_config; +int smp_found_config __ro_after_init;  static struct resource lapic_resource = {  	.name = "Local APIC", @@ -199,7 +199,7 @@ unsigned int lapic_timer_period = 0;  static void apic_pm_activate(void); -static unsigned long apic_phys; +static unsigned long apic_phys __ro_after_init;  /*   * Get the LAPIC version @@ -590,21 +590,21 @@ static u32 skx_deadline_rev(void)  static const struct x86_cpu_id deadline_match[] = {  	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X,	hsx_deadline_rev),  	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X,	0x0b000020), -	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D,	bdx_deadline_rev), +	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D,	bdx_deadline_rev),  	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X,	skx_deadline_rev), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE,	0x22), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT,	0x20), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_GT3E,	0x17), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL,		0x22), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L,	0x20), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G,	0x17), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_CORE,	0x25), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_GT3E,	0x17), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL,	0x25), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G,	0x17), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_MOBILE,	0xb2), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_DESKTOP,	0xb2), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L,	0xb2), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE,		0xb2), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_MOBILE,	0x52), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_DESKTOP,	0x52), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L,	0x52), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE,		0x52),  	{},  }; @@ -722,7 +722,7 @@ static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;  static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;  /* - * Temporary interrupt handler. + * Temporary interrupt handler and polled calibration function.   */  static void __init lapic_cal_handler(struct clock_event_device *dev)  { @@ -834,6 +834,10 @@ bool __init apic_needs_pit(void)  	if (!boot_cpu_has(X86_FEATURE_APIC))  		return true; +	/* Virt guests may lack ARAT, but still have DEADLINE */ +	if (!boot_cpu_has(X86_FEATURE_ARAT)) +		return true; +  	/* Deadline timer is based on TSC so no further PIT action required */  	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))  		return false; @@ -851,7 +855,8 @@ bool __init apic_needs_pit(void)  static int __init calibrate_APIC_clock(void)  {  	struct clock_event_device *levt = this_cpu_ptr(&lapic_events); -	void (*real_handler)(struct clock_event_device *dev); +	u64 tsc_perj = 0, tsc_start = 0; +	unsigned long jif_start;  	unsigned long deltaj;  	long delta, deltatsc;  	int pm_referenced = 0; @@ -878,28 +883,64 @@ static int __init calibrate_APIC_clock(void)  	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"  		    "calibrating APIC timer ...\n"); +	/* +	 * There are platforms w/o global clockevent devices. Instead of +	 * making the calibration conditional on that, use a polling based +	 * approach everywhere. +	 */  	local_irq_disable(); -	/* Replace the global interrupt handler */ -	real_handler = global_clock_event->event_handler; -	global_clock_event->event_handler = lapic_cal_handler; -  	/*  	 * Setup the APIC counter to maximum. There is no way the lapic  	 * can underflow in the 100ms detection time frame  	 */  	__setup_APIC_LVTT(0xffffffff, 0, 0); -	/* Let the interrupts run */ +	/* +	 * Methods to terminate the calibration loop: +	 *  1) Global clockevent if available (jiffies) +	 *  2) TSC if available and frequency is known +	 */ +	jif_start = READ_ONCE(jiffies); + +	if (tsc_khz) { +		tsc_start = rdtsc(); +		tsc_perj = div_u64((u64)tsc_khz * 1000, HZ); +	} + +	/* +	 * Enable interrupts so the tick can fire, if a global +	 * clockevent device is available +	 */  	local_irq_enable(); -	while (lapic_cal_loops <= LAPIC_CAL_LOOPS) -		cpu_relax(); +	while (lapic_cal_loops <= LAPIC_CAL_LOOPS) { +		/* Wait for a tick to elapse */ +		while (1) { +			if (tsc_khz) { +				u64 tsc_now = rdtsc(); +				if ((tsc_now - tsc_start) >= tsc_perj) { +					tsc_start += tsc_perj; +					break; +				} +			} else { +				unsigned long jif_now = READ_ONCE(jiffies); -	local_irq_disable(); +				if (time_after(jif_now, jif_start)) { +					jif_start = jif_now; +					break; +				} +			} +			cpu_relax(); +		} + +		/* Invoke the calibration routine */ +		local_irq_disable(); +		lapic_cal_handler(NULL); +		local_irq_enable(); +	} -	/* Restore the real event handler */ -	global_clock_event->event_handler = real_handler; +	local_irq_disable();  	/* Build delta t1-t2 as apic timer counts down */  	delta = lapic_cal_t1 - lapic_cal_t2; @@ -943,10 +984,11 @@ static int __init calibrate_APIC_clock(void)  	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;  	/* -	 * PM timer calibration failed or not turned on -	 * so lets try APIC timer based calibration +	 * PM timer calibration failed or not turned on so lets try APIC +	 * timer based calibration, if a global clockevent device is +	 * available.  	 */ -	if (!pm_referenced) { +	if (!pm_referenced && global_clock_event) {  		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");  		/* @@ -1182,25 +1224,38 @@ void clear_local_APIC(void)  }  /** - * disable_local_APIC - clear and disable the local APIC + * apic_soft_disable - Clears and software disables the local APIC on hotplug + * + * Contrary to disable_local_APIC() this does not touch the enable bit in + * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC + * bus would require a hardware reset as the APIC would lose track of bus + * arbitration. On systems with FSB delivery APICBASE could be disabled, + * but it has to be guaranteed that no interrupt is sent to the APIC while + * in that state and it's not clear from the SDM whether it still responds + * to INIT/SIPI messages. Stay on the safe side and use software disable.   */ -void disable_local_APIC(void) +void apic_soft_disable(void)  { -	unsigned int value; - -	/* APIC hasn't been mapped yet */ -	if (!x2apic_mode && !apic_phys) -		return; +	u32 value;  	clear_local_APIC(); -	/* -	 * Disable APIC (implies clearing of registers -	 * for 82489DX!). -	 */ +	/* Soft disable APIC (implies clearing of registers for 82489DX!). */  	value = apic_read(APIC_SPIV);  	value &= ~APIC_SPIV_APIC_ENABLED;  	apic_write(APIC_SPIV, value); +} + +/** + * disable_local_APIC - clear and disable the local APIC + */ +void disable_local_APIC(void) +{ +	/* APIC hasn't been mapped yet */ +	if (!x2apic_mode && !apic_phys) +		return; + +	apic_soft_disable();  #ifdef CONFIG_X86_32  	/* @@ -1265,7 +1320,7 @@ void __init sync_Arb_IDs(void)  			APIC_INT_LEVELTRIG | APIC_DM_INIT);  } -enum apic_intr_mode_id apic_intr_mode; +enum apic_intr_mode_id apic_intr_mode __ro_after_init;  static int __init apic_intr_mode_select(void)  { @@ -1453,54 +1508,72 @@ static void lapic_setup_esr(void)  			oldvalue, value);  } -static void apic_pending_intr_clear(void) +#define APIC_IR_REGS		APIC_ISR_NR +#define APIC_IR_BITS		(APIC_IR_REGS * 32) +#define APIC_IR_MAPSIZE		(APIC_IR_BITS / BITS_PER_LONG) + +union apic_ir { +	unsigned long	map[APIC_IR_MAPSIZE]; +	u32		regs[APIC_IR_REGS]; +}; + +static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)  { -	long long max_loops = cpu_khz ? cpu_khz : 1000000; -	unsigned long long tsc = 0, ntsc; -	unsigned int queued; -	unsigned long value; -	int i, j, acked = 0; +	int i, bit; + +	/* Read the IRRs */ +	for (i = 0; i < APIC_IR_REGS; i++) +		irr->regs[i] = apic_read(APIC_IRR + i * 0x10); + +	/* Read the ISRs */ +	for (i = 0; i < APIC_IR_REGS; i++) +		isr->regs[i] = apic_read(APIC_ISR + i * 0x10); -	if (boot_cpu_has(X86_FEATURE_TSC)) -		tsc = rdtsc();  	/* -	 * After a crash, we no longer service the interrupts and a pending -	 * interrupt from previous kernel might still have ISR bit set. -	 * -	 * Most probably by now CPU has serviced that pending interrupt and -	 * it might not have done the ack_APIC_irq() because it thought, -	 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it -	 * does not clear the ISR bit and cpu thinks it has already serivced -	 * the interrupt. Hence a vector might get locked. It was noticed -	 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. +	 * If the ISR map is not empty. ACK the APIC and run another round +	 * to verify whether a pending IRR has been unblocked and turned +	 * into a ISR.  	 */ -	do { -		queued = 0; -		for (i = APIC_ISR_NR - 1; i >= 0; i--) -			queued |= apic_read(APIC_IRR + i*0x10); - -		for (i = APIC_ISR_NR - 1; i >= 0; i--) { -			value = apic_read(APIC_ISR + i*0x10); -			for_each_set_bit(j, &value, 32) { -				ack_APIC_irq(); -				acked++; -			} -		} -		if (acked > 256) { -			pr_err("LAPIC pending interrupts after %d EOI\n", acked); -			break; -		} -		if (queued) { -			if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { -				ntsc = rdtsc(); -				max_loops = (long long)cpu_khz << 10; -				max_loops -= ntsc - tsc; -			} else { -				max_loops--; -			} -		} -	} while (queued && max_loops > 0); -	WARN_ON(max_loops <= 0); +	if (!bitmap_empty(isr->map, APIC_IR_BITS)) { +		/* +		 * There can be multiple ISR bits set when a high priority +		 * interrupt preempted a lower priority one. Issue an ACK +		 * per set bit. +		 */ +		for_each_set_bit(bit, isr->map, APIC_IR_BITS) +			ack_APIC_irq(); +		return true; +	} + +	return !bitmap_empty(irr->map, APIC_IR_BITS); +} + +/* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now the CPU has serviced that pending interrupt and it + * might not have done the ack_APIC_irq() because it thought, interrupt + * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear + * the ISR bit and cpu thinks it has already serivced the interrupt. Hence + * a vector might get locked. It was noticed for timer irq (vector + * 0x31). Issue an extra EOI to clear ISR. + * + * If there are pending IRR bits they turn into ISR bits after a higher + * priority ISR bit has been acked. + */ +static void apic_pending_intr_clear(void) +{ +	union apic_ir irr, isr; +	unsigned int i; + +	/* 512 loops are way oversized and give the APIC a chance to obey. */ +	for (i = 0; i < 512; i++) { +		if (!apic_check_and_ack(&irr, &isr)) +			return; +	} +	/* Dump the IRR/ISR content if that failed */ +	pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);  }  /** @@ -1517,12 +1590,19 @@ static void setup_local_APIC(void)  	int logical_apicid, ldr_apicid;  #endif -  	if (disable_apic) {  		disable_ioapic_support();  		return;  	} +	/* +	 * If this comes from kexec/kcrash the APIC might be enabled in +	 * SPIV. Soft disable it before doing further initialization. +	 */ +	value = apic_read(APIC_SPIV); +	value &= ~APIC_SPIV_APIC_ENABLED; +	apic_write(APIC_SPIV, value); +  #ifdef CONFIG_X86_32  	/* Pound the ESR really hard over the head with a big hammer - mbligh */  	if (lapic_is_integrated() && apic->disable_esr) { @@ -1532,8 +1612,6 @@ static void setup_local_APIC(void)  		apic_write(APIC_ESR, 0);  	}  #endif -	perf_events_lapic_init(); -  	/*  	 * Double-check whether this APIC is really registered.  	 * This is meaningless in clustered apic mode, so we skip it. @@ -1561,13 +1639,17 @@ static void setup_local_APIC(void)  #endif  	/* -	 * Set Task Priority to 'accept all'. We never change this -	 * later on. +	 * Set Task Priority to 'accept all except vectors 0-31'.  An APIC +	 * vector in the 16-31 range could be delivered if TPR == 0, but we +	 * would think it's an exception and terrible things will happen.  We +	 * never change this later on.  	 */  	value = apic_read(APIC_TASKPRI);  	value &= ~APIC_TPRI_MASK; +	value |= 0x10;  	apic_write(APIC_TASKPRI, value); +	/* Clear eventually stale ISR/IRR bits */  	apic_pending_intr_clear();  	/* @@ -1614,6 +1696,8 @@ static void setup_local_APIC(void)  	value |= SPURIOUS_APIC_VECTOR;  	apic_write(APIC_SPIV, value); +	perf_events_lapic_init(); +  	/*  	 * Set up LVT0, LVT1:  	 *  |