diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 68 | ||||
-rw-r--r-- | arch/x86/kernel/apic/bigsmp_32.c | 24 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/apic/probe_32.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/amd.c | 66 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/umwait.c | 39 | ||||
-rw-r--r-- | arch/x86/kernel/uprobes.c | 17 |
7 files changed, 178 insertions, 47 deletions
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f5291362da1a..aa5495d0f478 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -722,7 +722,7 @@ static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; /* - * Temporary interrupt handler. + * Temporary interrupt handler and polled calibration function. */ static void __init lapic_cal_handler(struct clock_event_device *dev) { @@ -851,7 +851,8 @@ bool __init apic_needs_pit(void) static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = this_cpu_ptr(&lapic_events); - void (*real_handler)(struct clock_event_device *dev); + u64 tsc_perj = 0, tsc_start = 0; + unsigned long jif_start; unsigned long deltaj; long delta, deltatsc; int pm_referenced = 0; @@ -878,28 +879,64 @@ static int __init calibrate_APIC_clock(void) apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" "calibrating APIC timer ...\n"); + /* + * There are platforms w/o global clockevent devices. Instead of + * making the calibration conditional on that, use a polling based + * approach everywhere. + */ local_irq_disable(); - /* Replace the global interrupt handler */ - real_handler = global_clock_event->event_handler; - global_clock_event->event_handler = lapic_cal_handler; - /* * Setup the APIC counter to maximum. There is no way the lapic * can underflow in the 100ms detection time frame */ __setup_APIC_LVTT(0xffffffff, 0, 0); - /* Let the interrupts run */ + /* + * Methods to terminate the calibration loop: + * 1) Global clockevent if available (jiffies) + * 2) TSC if available and frequency is known + */ + jif_start = READ_ONCE(jiffies); + + if (tsc_khz) { + tsc_start = rdtsc(); + tsc_perj = div_u64((u64)tsc_khz * 1000, HZ); + } + + /* + * Enable interrupts so the tick can fire, if a global + * clockevent device is available + */ local_irq_enable(); - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) { + /* Wait for a tick to elapse */ + while (1) { + if (tsc_khz) { + u64 tsc_now = rdtsc(); + if ((tsc_now - tsc_start) >= tsc_perj) { + tsc_start += tsc_perj; + break; + } + } else { + unsigned long jif_now = READ_ONCE(jiffies); - local_irq_disable(); + if (time_after(jif_now, jif_start)) { + jif_start = jif_now; + break; + } + } + cpu_relax(); + } - /* Restore the real event handler */ - global_clock_event->event_handler = real_handler; + /* Invoke the calibration routine */ + local_irq_disable(); + lapic_cal_handler(NULL); + local_irq_enable(); + } + + local_irq_disable(); /* Build delta t1-t2 as apic timer counts down */ delta = lapic_cal_t1 - lapic_cal_t2; @@ -943,10 +980,11 @@ static int __init calibrate_APIC_clock(void) levt->features &= ~CLOCK_EVT_FEAT_DUMMY; /* - * PM timer calibration failed or not turned on - * so lets try APIC timer based calibration + * PM timer calibration failed or not turned on so lets try APIC + * timer based calibration, if a global clockevent device is + * available. */ - if (!pm_referenced) { + if (!pm_referenced && global_clock_event) { apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); /* diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index afee386ff711..caedd8d60d36 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c @@ -38,32 +38,12 @@ static int bigsmp_early_logical_apicid(int cpu) return early_per_cpu(x86_cpu_to_apicid, cpu); } -static inline unsigned long calculate_ldr(int cpu) -{ - unsigned long val, id; - - val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; - id = per_cpu(x86_bios_cpu_apicid, cpu); - val |= SET_APIC_LOGICAL_ID(id); - - return val; -} - /* - * Set up the logical destination ID. - * - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... + * bigsmp enables physical destination mode + * and doesn't use LDR and DFR */ static void bigsmp_init_apic_ldr(void) { - unsigned long val; - int cpu = smp_processor_id(); - - apic_write(APIC_DFR, APIC_DFR_FLAT); - val = calculate_ldr(cpu); - apic_write(APIC_LDR, val); } static void bigsmp_setup_apic_routing(void) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index c7bb6c69f21c..d6af97fd170a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2438,7 +2438,13 @@ unsigned int arch_dynirq_lower_bound(unsigned int from) * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use * gsi_top if ioapic_dynirq_base hasn't been initialized yet. */ - return ioapic_initialized ? ioapic_dynirq_base : gsi_top; + if (!ioapic_initialized) + return gsi_top; + /* + * For DT enabled machines ioapic_dynirq_base is irrelevant and not + * updated. So simply return @from if ioapic_dynirq_base == 0. + */ + return ioapic_dynirq_base ? : from; } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1492799b8f43..ee2d91e382f1 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -184,7 +184,8 @@ void __init default_setup_apic_routing(void) def_to_bigsmp = 0; break; } - /* If P4 and above fall through */ + /* P4 and above */ + /* fall through */ case X86_VENDOR_HYGON: case X86_VENDOR_AMD: def_to_bigsmp = 1; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8d4e50428b68..68c363c341bf 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -804,6 +804,64 @@ static void init_amd_ln(struct cpuinfo_x86 *c) msr_set_bit(MSR_AMD64_DE_CFG, 31); } +static bool rdrand_force; + +static int __init rdrand_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "force")) + rdrand_force = true; + else + return -EINVAL; + + return 0; +} +early_param("rdrand", rdrand_cmdline); + +static void clear_rdrand_cpuid_bit(struct cpuinfo_x86 *c) +{ + /* + * Saving of the MSR used to hide the RDRAND support during + * suspend/resume is done by arch/x86/power/cpu.c, which is + * dependent on CONFIG_PM_SLEEP. + */ + if (!IS_ENABLED(CONFIG_PM_SLEEP)) + return; + + /* + * The nordrand option can clear X86_FEATURE_RDRAND, so check for + * RDRAND support using the CPUID function directly. + */ + if (!(cpuid_ecx(1) & BIT(30)) || rdrand_force) + return; + + msr_clear_bit(MSR_AMD64_CPUID_FN_1, 62); + + /* + * Verify that the CPUID change has occurred in case the kernel is + * running virtualized and the hypervisor doesn't support the MSR. + */ + if (cpuid_ecx(1) & BIT(30)) { + pr_info_once("BIOS may not properly restore RDRAND after suspend, but hypervisor does not support hiding RDRAND via CPUID.\n"); + return; + } + + clear_cpu_cap(c, X86_FEATURE_RDRAND); + pr_info_once("BIOS may not properly restore RDRAND after suspend, hiding RDRAND via CPUID. Use rdrand=force to reenable.\n"); +} + +static void init_amd_jg(struct cpuinfo_x86 *c) +{ + /* + * Some BIOS implementations do not restore proper RDRAND support + * across suspend and resume. Check on whether to hide the RDRAND + * instruction support via CPUID. + */ + clear_rdrand_cpuid_bit(c); +} + static void init_amd_bd(struct cpuinfo_x86 *c) { u64 value; @@ -818,6 +876,13 @@ static void init_amd_bd(struct cpuinfo_x86 *c) wrmsrl_safe(MSR_F15H_IC_CFG, value); } } + + /* + * Some BIOS implementations do not restore proper RDRAND support + * across suspend and resume. Check on whether to hide the RDRAND + * instruction support via CPUID. + */ + clear_rdrand_cpuid_bit(c); } static void init_amd_zn(struct cpuinfo_x86 *c) @@ -860,6 +925,7 @@ static void init_amd(struct cpuinfo_x86 *c) case 0x10: init_amd_gh(c); break; case 0x12: init_amd_ln(c); break; case 0x15: init_amd_bd(c); break; + case 0x16: init_amd_jg(c); break; case 0x17: init_amd_zn(c); break; } diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c index 6a204e7336c1..32b4dc9030aa 100644 --- a/arch/x86/kernel/cpu/umwait.c +++ b/arch/x86/kernel/cpu/umwait.c @@ -18,6 +18,12 @@ static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE); /* + * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by + * hardware or BIOS before kernel boot. + */ +static u32 orig_umwait_control_cached __ro_after_init; + +/* * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in * the sysfs write functions. */ @@ -53,6 +59,23 @@ static int umwait_cpu_online(unsigned int cpu) } /* + * The CPU hotplug callback sets the control MSR to the original control + * value. + */ +static int umwait_cpu_offline(unsigned int cpu) +{ + /* + * This code is protected by the CPU hotplug already and + * orig_umwait_control_cached is never changed after it caches + * the original control MSR value in umwait_init(). So there + * is no race condition here. + */ + wrmsr(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached, 0); + + return 0; +} + +/* * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which * is the only active CPU at this time. The MSR is set up on the APs via the * CPU hotplug callback. @@ -185,8 +208,22 @@ static int __init umwait_init(void) if (!boot_cpu_has(X86_FEATURE_WAITPKG)) return -ENODEV; + /* + * Cache the original control MSR value before the control MSR is + * changed. This is the only place where orig_umwait_control_cached + * is modified. + */ + rdmsrl(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached); + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online", - umwait_cpu_online, NULL); + umwait_cpu_online, umwait_cpu_offline); + if (ret < 0) { + /* + * On failure, the control MSR on all CPUs has the + * original control value. + */ + return ret; + } register_syscore_ops(&umwait_syscore_ops); diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index d8359ebeea70..8cd745ef8c7b 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -508,9 +508,12 @@ struct uprobe_xol_ops { void (*abort)(struct arch_uprobe *, struct pt_regs *); }; -static inline int sizeof_long(void) +static inline int sizeof_long(struct pt_regs *regs) { - return in_ia32_syscall() ? 4 : 8; + /* + * Check registers for mode as in_xxx_syscall() does not apply here. + */ + return user_64bit_mode(regs) ? 8 : 4; } static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) @@ -521,9 +524,9 @@ static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) static int emulate_push_stack(struct pt_regs *regs, unsigned long val) { - unsigned long new_sp = regs->sp - sizeof_long(); + unsigned long new_sp = regs->sp - sizeof_long(regs); - if (copy_to_user((void __user *)new_sp, &val, sizeof_long())) + if (copy_to_user((void __user *)new_sp, &val, sizeof_long(regs))) return -EFAULT; regs->sp = new_sp; @@ -556,7 +559,7 @@ static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs long correction = utask->vaddr - utask->xol_vaddr; regs->ip += correction; } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) { - regs->sp += sizeof_long(); /* Pop incorrect return address */ + regs->sp += sizeof_long(regs); /* Pop incorrect return address */ if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen)) return -ERESTART; } @@ -675,7 +678,7 @@ static int branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) * "call" insn was executed out-of-line. Just restore ->sp and restart. * We could also restore ->ip and try to call branch_emulate_op() again. */ - regs->sp += sizeof_long(); + regs->sp += sizeof_long(regs); return -ERESTART; } @@ -1056,7 +1059,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs) { - int rasize = sizeof_long(), nleft; + int rasize = sizeof_long(regs), nleft; unsigned long orig_ret_vaddr = 0; /* clear high bits for 32-bit apps */ if (copy_from_user(&orig_ret_vaddr, (void __user *)regs->sp, rasize)) |