diff options
Diffstat (limited to 'arch/x86/kernel/apic/apic.c')
| -rw-r--r-- | arch/x86/kernel/apic/apic.c | 208 | 
1 files changed, 125 insertions, 83 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 08fb79f37793..9e2dd2b296cd 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -65,10 +65,10 @@ unsigned int num_processors;  unsigned disabled_cpus;  /* Processor that is doing the boot up */ -unsigned int boot_cpu_physical_apicid = -1U; +unsigned int boot_cpu_physical_apicid __ro_after_init = -1U;  EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid); -u8 boot_cpu_apic_version; +u8 boot_cpu_apic_version __ro_after_init;  /*   * The highest APIC ID seen during enumeration. @@ -85,13 +85,13 @@ physid_mask_t phys_cpu_present_map;   * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to   * avoid undefined behaviour caused by sending INIT from AP to BSP.   */ -static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID; +static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;  /*   * This variable controls which CPUs receive external NMIs.  By default,   * external NMIs are delivered only to the BSP.   */ -static int apic_extnmi = APIC_EXTNMI_BSP; +static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;  /*   * Map cpu index to physical APIC ID @@ -114,7 +114,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);  DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);  /* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; +static int enabled_via_apicbase __ro_after_init;  /*   * Handle interrupt mode configuration register (IMCR). @@ -172,23 +172,23 @@ static __init int setup_apicpmtimer(char *s)  __setup("apicpmtimer", setup_apicpmtimer);  #endif -unsigned long mp_lapic_addr; -int disable_apic; +unsigned long mp_lapic_addr __ro_after_init; +int disable_apic __ro_after_init;  /* Disable local APIC timer from the kernel commandline or via dmi quirk */  static int disable_apic_timer __initdata;  /* Local APIC timer works in C2 */ -int local_apic_timer_c2_ok; +int local_apic_timer_c2_ok __ro_after_init;  EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);  /*   * Debug level, exported for io_apic.c   */ -int apic_verbosity; +int apic_verbosity __ro_after_init; -int pic_mode; +int pic_mode __ro_after_init;  /* Have we found an MP table */ -int smp_found_config; +int smp_found_config __ro_after_init;  static struct resource lapic_resource = {  	.name = "Local APIC", @@ -199,7 +199,7 @@ unsigned int lapic_timer_period = 0;  static void apic_pm_activate(void); -static unsigned long apic_phys; +static unsigned long apic_phys __ro_after_init;  /*   * Get the LAPIC version @@ -590,21 +590,21 @@ static u32 skx_deadline_rev(void)  static const struct x86_cpu_id deadline_match[] = {  	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X,	hsx_deadline_rev),  	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X,	0x0b000020), -	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D,	bdx_deadline_rev), +	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D,	bdx_deadline_rev),  	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X,	skx_deadline_rev), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE,	0x22), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT,	0x20), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_GT3E,	0x17), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL,		0x22), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L,	0x20), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G,	0x17), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_CORE,	0x25), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_GT3E,	0x17), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL,	0x25), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G,	0x17), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_MOBILE,	0xb2), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_DESKTOP,	0xb2), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L,	0xb2), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE,		0xb2), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_MOBILE,	0x52), -	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_DESKTOP,	0x52), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L,	0x52), +	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE,		0x52),  	{},  }; @@ -1224,25 +1224,38 @@ void clear_local_APIC(void)  }  /** - * disable_local_APIC - clear and disable the local APIC + * apic_soft_disable - Clears and software disables the local APIC on hotplug + * + * Contrary to disable_local_APIC() this does not touch the enable bit in + * MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC + * bus would require a hardware reset as the APIC would lose track of bus + * arbitration. On systems with FSB delivery APICBASE could be disabled, + * but it has to be guaranteed that no interrupt is sent to the APIC while + * in that state and it's not clear from the SDM whether it still responds + * to INIT/SIPI messages. Stay on the safe side and use software disable.   */ -void disable_local_APIC(void) +void apic_soft_disable(void)  { -	unsigned int value; - -	/* APIC hasn't been mapped yet */ -	if (!x2apic_mode && !apic_phys) -		return; +	u32 value;  	clear_local_APIC(); -	/* -	 * Disable APIC (implies clearing of registers -	 * for 82489DX!). -	 */ +	/* Soft disable APIC (implies clearing of registers for 82489DX!). */  	value = apic_read(APIC_SPIV);  	value &= ~APIC_SPIV_APIC_ENABLED;  	apic_write(APIC_SPIV, value); +} + +/** + * disable_local_APIC - clear and disable the local APIC + */ +void disable_local_APIC(void) +{ +	/* APIC hasn't been mapped yet */ +	if (!x2apic_mode && !apic_phys) +		return; + +	apic_soft_disable();  #ifdef CONFIG_X86_32  	/* @@ -1307,7 +1320,7 @@ void __init sync_Arb_IDs(void)  			APIC_INT_LEVELTRIG | APIC_DM_INIT);  } -enum apic_intr_mode_id apic_intr_mode; +enum apic_intr_mode_id apic_intr_mode __ro_after_init;  static int __init apic_intr_mode_select(void)  { @@ -1495,54 +1508,72 @@ static void lapic_setup_esr(void)  			oldvalue, value);  } -static void apic_pending_intr_clear(void) +#define APIC_IR_REGS		APIC_ISR_NR +#define APIC_IR_BITS		(APIC_IR_REGS * 32) +#define APIC_IR_MAPSIZE		(APIC_IR_BITS / BITS_PER_LONG) + +union apic_ir { +	unsigned long	map[APIC_IR_MAPSIZE]; +	u32		regs[APIC_IR_REGS]; +}; + +static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)  { -	long long max_loops = cpu_khz ? cpu_khz : 1000000; -	unsigned long long tsc = 0, ntsc; -	unsigned int queued; -	unsigned long value; -	int i, j, acked = 0; +	int i, bit; + +	/* Read the IRRs */ +	for (i = 0; i < APIC_IR_REGS; i++) +		irr->regs[i] = apic_read(APIC_IRR + i * 0x10); + +	/* Read the ISRs */ +	for (i = 0; i < APIC_IR_REGS; i++) +		isr->regs[i] = apic_read(APIC_ISR + i * 0x10); -	if (boot_cpu_has(X86_FEATURE_TSC)) -		tsc = rdtsc();  	/* -	 * After a crash, we no longer service the interrupts and a pending -	 * interrupt from previous kernel might still have ISR bit set. -	 * -	 * Most probably by now CPU has serviced that pending interrupt and -	 * it might not have done the ack_APIC_irq() because it thought, -	 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it -	 * does not clear the ISR bit and cpu thinks it has already serivced -	 * the interrupt. Hence a vector might get locked. It was noticed -	 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. +	 * If the ISR map is not empty. ACK the APIC and run another round +	 * to verify whether a pending IRR has been unblocked and turned +	 * into a ISR.  	 */ -	do { -		queued = 0; -		for (i = APIC_ISR_NR - 1; i >= 0; i--) -			queued |= apic_read(APIC_IRR + i*0x10); - -		for (i = APIC_ISR_NR - 1; i >= 0; i--) { -			value = apic_read(APIC_ISR + i*0x10); -			for_each_set_bit(j, &value, 32) { -				ack_APIC_irq(); -				acked++; -			} -		} -		if (acked > 256) { -			pr_err("LAPIC pending interrupts after %d EOI\n", acked); -			break; -		} -		if (queued) { -			if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) { -				ntsc = rdtsc(); -				max_loops = (long long)cpu_khz << 10; -				max_loops -= ntsc - tsc; -			} else { -				max_loops--; -			} -		} -	} while (queued && max_loops > 0); -	WARN_ON(max_loops <= 0); +	if (!bitmap_empty(isr->map, APIC_IR_BITS)) { +		/* +		 * There can be multiple ISR bits set when a high priority +		 * interrupt preempted a lower priority one. Issue an ACK +		 * per set bit. +		 */ +		for_each_set_bit(bit, isr->map, APIC_IR_BITS) +			ack_APIC_irq(); +		return true; +	} + +	return !bitmap_empty(irr->map, APIC_IR_BITS); +} + +/* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now the CPU has serviced that pending interrupt and it + * might not have done the ack_APIC_irq() because it thought, interrupt + * came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear + * the ISR bit and cpu thinks it has already serivced the interrupt. Hence + * a vector might get locked. It was noticed for timer irq (vector + * 0x31). Issue an extra EOI to clear ISR. + * + * If there are pending IRR bits they turn into ISR bits after a higher + * priority ISR bit has been acked. + */ +static void apic_pending_intr_clear(void) +{ +	union apic_ir irr, isr; +	unsigned int i; + +	/* 512 loops are way oversized and give the APIC a chance to obey. */ +	for (i = 0; i < 512; i++) { +		if (!apic_check_and_ack(&irr, &isr)) +			return; +	} +	/* Dump the IRR/ISR content if that failed */ +	pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);  }  /** @@ -1559,12 +1590,19 @@ static void setup_local_APIC(void)  	int logical_apicid, ldr_apicid;  #endif -  	if (disable_apic) {  		disable_ioapic_support();  		return;  	} +	/* +	 * If this comes from kexec/kcrash the APIC might be enabled in +	 * SPIV. Soft disable it before doing further initialization. +	 */ +	value = apic_read(APIC_SPIV); +	value &= ~APIC_SPIV_APIC_ENABLED; +	apic_write(APIC_SPIV, value); +  #ifdef CONFIG_X86_32  	/* Pound the ESR really hard over the head with a big hammer - mbligh */  	if (lapic_is_integrated() && apic->disable_esr) { @@ -1574,8 +1612,6 @@ static void setup_local_APIC(void)  		apic_write(APIC_ESR, 0);  	}  #endif -	perf_events_lapic_init(); -  	/*  	 * Double-check whether this APIC is really registered.  	 * This is meaningless in clustered apic mode, so we skip it. @@ -1603,13 +1639,17 @@ static void setup_local_APIC(void)  #endif  	/* -	 * Set Task Priority to 'accept all'. We never change this -	 * later on. +	 * Set Task Priority to 'accept all except vectors 0-31'.  An APIC +	 * vector in the 16-31 range could be delivered if TPR == 0, but we +	 * would think it's an exception and terrible things will happen.  We +	 * never change this later on.  	 */  	value = apic_read(APIC_TASKPRI);  	value &= ~APIC_TPRI_MASK; +	value |= 0x10;  	apic_write(APIC_TASKPRI, value); +	/* Clear eventually stale ISR/IRR bits */  	apic_pending_intr_clear();  	/* @@ -1656,6 +1696,8 @@ static void setup_local_APIC(void)  	value |= SPURIOUS_APIC_VECTOR;  	apic_write(APIC_SPIV, value); +	perf_events_lapic_init(); +  	/*  	 * Set up LVT0, LVT1:  	 *  |