diff options
Diffstat (limited to 'arch/x86')
41 files changed, 293 insertions, 195 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2fdb23313dd5..9bceea6a5852 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -56,7 +56,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_PMEM_API if X86_64 # Causing hangs/crashes, see the commit that added this change for details. - select ARCH_HAS_REFCOUNT if BROKEN + select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 03505ffbe1b6..d7d3cc24baf4 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -75,7 +75,7 @@ static long syscall_trace_enter(struct pt_regs *regs) if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); - work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; + work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; if (unlikely(work & _TIF_SYSCALL_EMU)) emulated = true; @@ -186,9 +186,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) addr_limit_user_check(); - if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) - local_irq_disable(); - + lockdep_assert_irqs_disabled(); lockdep_sys_exit(); cached_flags = READ_ONCE(ti->flags); diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index fa8dbfcf7ed3..11b13c4b43d5 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -318,7 +318,7 @@ int gettimeofday(struct timeval *, struct timezone *) notrace time_t __vdso_time(time_t *t) { /* This is atomic on x86 so we don't need any locks. */ - time_t result = ACCESS_ONCE(gtod->wall_time_sec); + time_t result = READ_ONCE(gtod->wall_time_sec); if (t) *t = result; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 80534d3c2480..140d33288e78 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2118,7 +2118,7 @@ static int x86_pmu_event_init(struct perf_event *event) event->destroy(event); } - if (ACCESS_ONCE(x86_pmu.attr_rdpmc)) + if (READ_ONCE(x86_pmu.attr_rdpmc)) event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; return err; @@ -2371,7 +2371,7 @@ static unsigned long get_segment_base(unsigned int segment) struct ldt_struct *ldt; /* IRQs are off, so this synchronizes with smp_store_release */ - ldt = lockless_dereference(current->active_mm->context.ldt); + ldt = READ_ONCE(current->active_mm->context.ldt); if (!ldt || idx >= ldt->nr_entries) return 0; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 9fb9a1f1e47b..43445da30cea 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event) if (event->attr.use_clockid) flags &= ~PERF_SAMPLE_TIME; + if (!event->attr.exclude_kernel) + flags &= ~PERF_SAMPLE_REGS_USER; + if (event->attr.sample_regs_user & ~PEBS_REGS) + flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); return flags; } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 4196f81ec0e1..f7aaadf9331f 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -85,13 +85,15 @@ struct amd_nb { * Flags PEBS can handle without an PMI. * * TID can only be handled by flushing at context switch. + * REGS_USER can be handled for events limited to ring 3. * */ #define PEBS_FREERUNNING_FLAGS \ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \ - PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR) + PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER) /* * A debug store configuration. @@ -110,6 +112,26 @@ struct debug_store { u64 pebs_event_reset[MAX_PEBS_EVENTS]; }; +#define PEBS_REGS \ + (PERF_REG_X86_AX | \ + PERF_REG_X86_BX | \ + PERF_REG_X86_CX | \ + PERF_REG_X86_DX | \ + PERF_REG_X86_DI | \ + PERF_REG_X86_SI | \ + PERF_REG_X86_SP | \ + PERF_REG_X86_BP | \ + PERF_REG_X86_IP | \ + PERF_REG_X86_FLAGS | \ + PERF_REG_X86_R8 | \ + PERF_REG_X86_R9 | \ + PERF_REG_X86_R10 | \ + PERF_REG_X86_R11 | \ + PERF_REG_X86_R12 | \ + PERF_REG_X86_R13 | \ + PERF_REG_X86_R14 | \ + PERF_REG_X86_R15) + /* * Per register state. */ diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 01727dbc294a..7fb336210e1b 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -12,11 +12,11 @@ */ #ifdef CONFIG_X86_32 -#define mb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "mfence", \ +#define mb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "mfence", \ X86_FEATURE_XMM2) ::: "memory", "cc") -#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "lfence", \ +#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "lfence", \ X86_FEATURE_XMM2) ::: "memory", "cc") -#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,0(%%esp)", "sfence", \ +#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \ X86_FEATURE_XMM2) ::: "memory", "cc") #else #define mb() asm volatile("mfence":::"memory") @@ -31,7 +31,11 @@ #endif #define dma_wmb() barrier() -#define __smp_mb() mb() +#ifdef CONFIG_X86_32 +#define __smp_mb() asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc") +#else +#define __smp_mb() asm volatile("lock; addl $0,-4(%%rsp)" ::: "memory", "cc") +#endif #define __smp_rmb() dma_rmb() #define __smp_wmb() barrier() #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index c1a125e47ff3..3a091cea36c5 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -253,7 +253,7 @@ extern int force_personality32; * space open for things that want to use the area for 32-bit pointers. */ #define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ - (TASK_SIZE / 3 * 2)) + (DEFAULT_MAP_WINDOW / 3 * 2)) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 6cf65437b5e5..9f2e3102e0bb 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -58,8 +58,8 @@ extern __visible kprobe_opcode_t optprobe_template_call[]; extern __visible kprobe_opcode_t optprobe_template_end[]; #define MAX_OPTIMIZED_LENGTH (MAX_INSN_SIZE + RELATIVE_ADDR_SIZE) #define MAX_OPTINSN_SIZE \ - (((unsigned long)&optprobe_template_end - \ - (unsigned long)&optprobe_template_entry) + \ + (((unsigned long)optprobe_template_end - \ + (unsigned long)optprobe_template_entry) + \ MAX_OPTIMIZED_LENGTH + RELATIVEJUMP_SIZE) extern const int kretprobe_blacklist_size; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 6699fc441644..6d16d15d09a0 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -73,8 +73,8 @@ static inline void load_mm_ldt(struct mm_struct *mm) #ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; - /* lockless_dereference synchronizes with smp_store_release */ - ldt = lockless_dereference(mm->context.ldt); + /* READ_ONCE synchronizes with smp_store_release */ + ldt = READ_ONCE(mm->context.ldt); /* * Any change to mm->context.ldt is followed by an IPI to all diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 9982dd96f093..5e16b5d40d32 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_QSPINLOCK_H #define _ASM_X86_QSPINLOCK_H +#include <linux/jump_label.h> #include <asm/cpufeature.h> #include <asm-generic/qspinlock_types.h> #include <asm/paravirt.h> @@ -47,10 +48,14 @@ static inline void queued_spin_unlock(struct qspinlock *lock) #endif #ifdef CONFIG_PARAVIRT +DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); + +void native_pv_lock_init(void) __init; + #define virt_spin_lock virt_spin_lock static inline bool virt_spin_lock(struct qspinlock *lock) { - if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) + if (!static_branch_likely(&virt_spin_lock_key)) return false; /* @@ -66,6 +71,10 @@ static inline bool virt_spin_lock(struct qspinlock *lock) return true; } +#else +static inline void native_pv_lock_init(void) +{ +} #endif /* CONFIG_PARAVIRT */ #include <asm-generic/qspinlock.h> diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index ff871210b9f2..4e44250e7d0d 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h @@ -15,7 +15,7 @@ * back to the regular execution flow in .text. */ #define _REFCOUNT_EXCEPTION \ - ".pushsection .text.unlikely\n" \ + ".pushsection .text..refcount\n" \ "111:\tlea %[counter], %%" _ASM_CX "\n" \ "112:\t" ASM_UD0 "\n" \ ASM_UNREACHABLE \ diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 4d38d85a16ad..4c25cf6caefa 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -61,18 +61,33 @@ /* * lock for reading */ +#define ____down_read(sem, slow_path) \ +({ \ + struct rw_semaphore* ret; \ + asm volatile("# beginning down_read\n\t" \ + LOCK_PREFIX _ASM_INC "(%[sem])\n\t" \ + /* adds 0x00000001 */ \ + " jns 1f\n" \ + " call " slow_path "\n" \ + "1:\n\t" \ + "# ending down_read\n\t" \ + : "+m" (sem->count), "=a" (ret), \ + ASM_CALL_CONSTRAINT \ + : [sem] "a" (sem) \ + : "memory", "cc"); \ + ret; \ +}) + static inline void __down_read(struct rw_semaphore *sem) { - asm volatile("# beginning down_read\n\t" - LOCK_PREFIX _ASM_INC "(%1)\n\t" - /* adds 0x00000001 */ - " jns 1f\n" - " call call_rwsem_down_read_failed\n" - "1:\n\t" - "# ending down_read\n\t" - : "+m" (sem->count) - : "a" (sem) - : "memory", "cc"); + ____down_read(sem, "call_rwsem_down_read_failed"); +} + +static inline int __down_read_killable(struct rw_semaphore *sem) +{ + if (IS_ERR(____down_read(sem, "call_rwsem_down_read_failed_killable"))) + return -EINTR; + return 0; } /* @@ -82,17 +97,18 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) { long result, tmp; asm volatile("# beginning __down_read_trylock\n\t" - " mov %0,%1\n\t" + " mov %[count],%[result]\n\t" "1:\n\t" - " mov %1,%2\n\t" - " add %3,%2\n\t" + " mov %[result],%[tmp]\n\t" + " add %[inc],%[tmp]\n\t" " jle 2f\n\t" - LOCK_PREFIX " cmpxchg %2,%0\n\t" + LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t" " jnz 1b\n\t" "2:\n\t" "# ending __down_read_trylock\n\t" - : "+m" (sem->count), "=&a" (result), "=&r" (tmp) - : "i" (RWSEM_ACTIVE_READ_BIAS) + : [count] "+m" (sem->count), [result] "=&a" (result), + [tmp] "=&r" (tmp) + : [inc] "i" (RWSEM_ACTIVE_READ_BIAS) : "memory", "cc"); return result >= 0; } @@ -106,7 +122,7 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) struct rw_semaphore* ret; \ \ asm volatile("# beginning down_write\n\t" \ - LOCK_PREFIX " xadd %1,(%4)\n\t" \ + LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" \ /* adds 0xffff0001, returns the old value */ \ " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \ /* was the active mask 0 before? */\ @@ -114,9 +130,9 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) " call " slow_path "\n" \ "1:\n" \ "# ending down_write" \ - : "+m" (sem->count), "=d" (tmp), \ + : "+m" (sem->count), [tmp] "=d" (tmp), \ "=a" (ret), ASM_CALL_CONSTRAINT \ - : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \ + : [sem] "a" (sem), "[tmp]" (RWSEM_ACTIVE_WRITE_BIAS) \ : "memory", "cc"); \ ret; \ }) @@ -142,21 +158,21 @@ static inline bool __down_write_trylock(struct rw_semaphore *sem) bool result; long tmp0, tmp1; asm volatile("# beginning __down_write_trylock\n\t" - " mov %0,%1\n\t" + " mov %[count],%[tmp0]\n\t" "1:\n\t" " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" /* was the active mask 0 before? */ " jnz 2f\n\t" - " mov %1,%2\n\t" - " add %4,%2\n\t" - LOCK_PREFIX " cmpxchg %2,%0\n\t" + " mov %[tmp0],%[tmp1]\n\t" + " add %[inc],%[tmp1]\n\t" + LOCK_PREFIX " cmpxchg %[tmp1],%[count]\n\t" " jnz 1b\n\t" "2:\n\t" CC_SET(e) "# ending __down_write_trylock\n\t" - : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1), - CC_OUT(e) (result) - : "er" (RWSEM_ACTIVE_WRITE_BIAS) + : [count] "+m" (sem->count), [tmp0] "=&a" (tmp0), + [tmp1] "=&r" (tmp1), CC_OUT(e) (result) + : [inc] "er" (RWSEM_ACTIVE_WRITE_BIAS) : "memory"); return result; } @@ -168,14 +184,14 @@ static inline void __up_read(struct rw_semaphore *sem) { long tmp; asm volatile("# beginning __up_read\n\t" - LOCK_PREFIX " xadd %1,(%2)\n\t" + LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" /* subtracts 1, returns the old value */ " jns 1f\n\t" " call call_rwsem_wake\n" /* expects old value in %edx */ "1:\n" "# ending __up_read\n" - : "+m" (sem->count), "=d" (tmp) - : "a" (sem), "1" (-RWSEM_ACTIVE_READ_BIAS) + : "+m" (sem->count), [tmp] "=d" (tmp) + : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_READ_BIAS) : "memory", "cc"); } @@ -186,14 +202,14 @@ static inline void __up_write(struct rw_semaphore *sem) { long tmp; asm volatile("# beginning __up_write\n\t" - LOCK_PREFIX " xadd %1,(%2)\n\t" + LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" /* subtracts 0xffff0001, returns the old value */ " jns 1f\n\t" " call call_rwsem_wake\n" /* expects old value in %edx */ "1:\n\t" "# ending __up_write\n" - : "+m" (sem->count), "=d" (tmp) - : "a" (sem), "1" (-RWSEM_ACTIVE_WRITE_BIAS) + : "+m" (sem->count), [tmp] "=d" (tmp) + : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_WRITE_BIAS) : "memory", "cc"); } @@ -203,7 +219,7 @@ static inline void __up_write(struct rw_semaphore *sem) static inline void __downgrade_write(struct rw_semaphore *sem) { asm volatile("# beginning __downgrade_write\n\t" - LOCK_PREFIX _ASM_ADD "%2,(%1)\n\t" + LOCK_PREFIX _ASM_ADD "%[inc],(%[sem])\n\t" /* * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386) * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64) @@ -213,7 +229,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) "1:\n\t" "# ending __downgrade_write\n" : "+m" (sem->count) - : "a" (sem), "er" (-RWSEM_WAITING_BIAS) + : [sem] "a" (sem), [inc] "er" (-RWSEM_WAITING_BIAS) : "memory", "cc"); } diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index b34625796eb2..5b6bc7016c22 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -42,11 +42,4 @@ #include <asm/qrwlock.h> -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* _ASM_X86_SPINLOCK_H */ diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 52250681f68c..fb856c9f0449 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h @@ -49,7 +49,7 @@ static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) unsigned ret; repeat: - ret = ACCESS_ONCE(s->seq); + ret = READ_ONCE(s->seq); if (unlikely(ret & 1)) { cpu_relax(); goto repeat; diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 236999c54edc..c60922a66385 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -22,7 +22,7 @@ obj-y += common.o obj-y += rdrand.o obj-y += match.o obj-y += bugs.o -obj-y += aperfmperf.o +obj-$(CONFIG_CPU_FREQ) += aperfmperf.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index 957813e0180d..0ee83321a313 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -42,6 +42,10 @@ static void aperfmperf_snapshot_khz(void *dummy) s64 time_delta = ktime_ms_delta(now, s->time); unsigned long flags; + /* Don't bother re-computing within the cache threshold time. */ + if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS) + return; + local_irq_save(flags); rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); @@ -70,7 +74,6 @@ static void aperfmperf_snapshot_khz(void *dummy) unsigned int arch_freq_get_on_cpu(int cpu) { - s64 time_delta; unsigned int khz; if (!cpu_khz) @@ -79,12 +82,6 @@ unsigned int arch_freq_get_on_cpu(int cpu) if (!static_cpu_has(X86_FEATURE_APERFMPERF)) return 0; - /* Don't bother re-computing within the cache threshold time. */ - time_delta = ktime_ms_delta(ktime_get(), per_cpu(samples.time, cpu)); - khz = per_cpu(samples.khz, cpu); - if (khz && time_delta < APERFMPERF_CACHE_THRESHOLD_MS) - return khz; - smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); khz = per_cpu(samples.khz, cpu); if (khz) diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 87cc9ab7a13c..4ca632a06e0b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -204,7 +204,7 @@ static int error_context(struct mce *m) return IN_KERNEL; } -static int mce_severity_amd_smca(struct mce *m, int err_ctx) +static int mce_severity_amd_smca(struct mce *m, enum context err_ctx) { u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank); u32 low, high; @@ -245,6 +245,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc if (m->status & MCI_STATUS_UC) { + if (ctx == IN_KERNEL) + return MCE_PANIC_SEVERITY; + /* * On older systems where overflow_recov flag is not present, we * should simply panic if an error overflow occurs. If @@ -255,10 +258,6 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc if (mce_flags.smca) return mce_severity_amd_smca(m, ctx); - /* software can try to contain */ - if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL)) - return MCE_PANIC_SEVERITY; - /* kill current process */ return MCE_AR_SEVERITY; } else { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3b413065c613..b1d616d08eee 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1367,13 +1367,12 @@ static void __start_timer(struct timer_list *t, unsigned long interval) local_irq_restore(flags); } -static void mce_timer_fn(unsigned long data) +static void mce_timer_fn(struct timer_list *t) { - struct timer_list *t = this_cpu_ptr(&mce_timer); - int cpu = smp_processor_id(); + struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); unsigned long iv; - WARN_ON(cpu != data); + WARN_ON(cpu_t != t); iv = __this_cpu_read(mce_next_interval); @@ -1763,17 +1762,15 @@ static void mce_start_timer(struct timer_list *t) static void __mcheck_cpu_setup_timer(void) { struct timer_list *t = this_cpu_ptr(&mce_timer); - unsigned int cpu = smp_processor_id(); - setup_pinned_timer(t, mce_timer_fn, cpu); + timer_setup(t, mce_timer_fn, TIMER_PINNED); } static void __mcheck_cpu_init_timer(void) { struct timer_list *t = this_cpu_ptr(&mce_timer); - unsigned int cpu = smp_processor_id(); - setup_pinned_timer(t, mce_timer_fn, cpu); + timer_setup(t, mce_timer_fn, TIMER_PINNED); mce_start_timer(t); } diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 4378a729b933..6b7e17bf0b71 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -78,11 +78,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "microcode\t: 0x%x\n", c->microcode); if (cpu_has(c, X86_FEATURE_TSC)) { - unsigned int freq = arch_freq_get_on_cpu(cpu); + unsigned int freq = cpufreq_quick_get(cpu); if (!freq) - freq = cpufreq_quick_get(cpu); - if (!freq) freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", freq / 1000, (freq % 1000)); diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 9c4e7ba6870c..7d7715dde901 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -155,14 +155,14 @@ void init_espfix_ap(int cpu) page = cpu/ESPFIX_STACKS_PER_PAGE; /* Did another CPU already set this up? */ - stack_page = ACCESS_ONCE(espfix_pages[page]); + stack_page = READ_ONCE(espfix_pages[page]); if (likely(stack_page)) goto done; mutex_lock(&espfix_init_mutex); /* Did we race on the lock? */ - stack_page = ACCESS_ONCE(espfix_pages[page]); + stack_page = READ_ONCE(espfix_pages[page]); if (stack_page) goto unlock_done; @@ -200,7 +200,7 @@ void init_espfix_ap(int cpu) set_pte(&pte_p[n*PTE_STRIDE], pte); /* Job is done for this CPU and any CPU which shares this page */ - ACCESS_ONCE(espfix_pages[page]) = stack_page; + WRITE_ONCE(espfix_pages[page], stack_page); unlock_done: mutex_unlock(&espfix_init_mutex); diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 6107ee1cb8d5..014cb2fc47ff 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -92,8 +92,6 @@ static const __initdata struct idt_data def_idts[] = { INTG(X86_TRAP_DF, double_fault), #endif INTG(X86_TRAP_DB, debug), - INTG(X86_TRAP_NMI, nmi), - INTG(X86_TRAP_BP, int3), #ifdef CONFIG_X86_MCE INTG(X86_TRAP_MC, &machine_check), diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index 615105cf7d58..ae38dccf0c8f 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -85,11 +85,11 @@ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, * Copy an instruction and adjust the displacement if the instruction * uses the %rip-relative addressing mode. */ -extern int __copy_instruction(u8 *dest, u8 *src, struct insn *insn); +extern int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn); /* Generate a relative-jump/call instruction */ -extern void synthesize_reljump(void *from, void *to); -extern void synthesize_relcall(void *from, void *to); +extern void synthesize_reljump(void *dest, void *from, void *to); +extern void synthesize_relcall(void *dest, void *from, void *to); #ifdef CONFIG_OPTPROBES extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 0742491cbb73..bd36f3c33cd0 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -119,29 +119,29 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); static nokprobe_inline void -__synthesize_relative_insn(void *from, void *to, u8 op) +__synthesize_relative_insn(void *dest, void *from, void *to, u8 op) { struct __arch_relative_insn { u8 op; s32 raddr; } __packed *insn; - insn = (struct __arch_relative_insn *)from; + insn = (struct __arch_relative_insn *)dest; insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); insn->op = op; } /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ -void synthesize_reljump(void *from, void *to) +void synthesize_reljump(void *dest, void *from, void *to) { - __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); + __synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE); } NOKPROBE_SYMBOL(synthesize_reljump); /* Insert a call instruction at address 'from', which calls address 'to'.*/ -void synthesize_relcall(void *from, void *to) +void synthesize_relcall(void *dest, void *from, void *to) { - __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); + __synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE); } NOKPROBE_SYMBOL(synthesize_relcall); @@ -346,10 +346,11 @@ static int is_IF_modifier(kprobe_opcode_t *insn) /* * Copy an instruction with recovering modified instruction by kprobes * and adjust the displacement if the instruction uses the %rip-relative - * addressing mode. + * addressing mode. Note that since @real will be the final place of copied + * instruction, displacement must be adjust by @real, not @dest. * This returns the length of copied instruction, or 0 if it has an error. */ -int __copy_instruction(u8 *dest, u8 *src, struct insn *insn) +int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) { kprobe_opcode_t buf[MAX_INSN_SIZE]; unsigned long recovered_insn = @@ -387,11 +388,11 @@ int __copy_instruction(u8 *dest, u8 *src, struct insn *insn) * have given. */ newdisp = (u8 *) src + (s64) insn->displacement.value - - (u8 *) dest; + - (u8 *) real; if ((s64) (s32) newdisp != newdisp) { pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); pr_err("\tSrc: %p, Dest: %p, old disp: %x\n", - src, dest, insn->displacement.value); + src, real, insn->displacement.value); return 0; } disp = (u8 *) dest + insn_offset_displacement(insn); @@ -402,20 +403,38 @@ int __copy_instruction(u8 *dest, u8 *src, struct insn *insn) } /* Prepare reljump right after instruction to boost */ -static void prepare_boost(struct kprobe *p, struct insn *insn) +static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p, + struct insn *insn) { + int len = insn->length; + if (can_boost(insn, p->addr) && - MAX_INSN_SIZE - insn->length >= RELATIVEJUMP_SIZE) { + MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) { /* * These instructions can be executed directly if it * jumps back to correct address. */ - synthesize_reljump(p->ainsn.insn + insn->length, + synthesize_reljump(buf + len, p->ainsn.insn + len, p->addr + insn->length); + len += RELATIVEJUMP_SIZE; p->ainsn.boostable = true; } else { p->ainsn.boostable = false; } + + return len; +} + +/* Make page to RO mode when allocate it */ +void *alloc_insn_page(void) +{ + void *page; + + page = module_alloc(PAGE_SIZE); + if (page) + set_memory_ro((unsigned long)page & PAGE_MASK, 1); + + return page; } /* Recover page to RW mode before releasing it */ @@ -429,12 +448,11 @@ void free_insn_page(void *page) static int arch_copy_kprobe(struct kprobe *p) { struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; int len; - set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1); - /* Copy an instruction with recovering if other optprobe modifies it.*/ - len = __copy_instruction(p->ainsn.insn, p->addr, &insn); + len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn); if (!len) return -EINVAL; @@ -442,15 +460,16 @@ static int arch_copy_kprobe(struct kprobe *p) * __copy_instruction can modify the displacement of the instruction, * but it doesn't affect boostable check. */ - prepare_boost(p, &insn); - - set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1); + len = prepare_boost(buf, p, &insn); /* Check whether the instruction modifies Interrupt Flag or not */ - p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn); + p->ainsn.if_modifier = is_IF_modifier(buf); /* Also, displacement change doesn't affect the first byte */ - p->opcode = p->ainsn.insn[0]; + p->opcode = buf[0]; + + /* OK, write back the instruction(s) into ROX insn buffer */ + text_poke(p->ainsn.insn, buf, len); return 0; } diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 041f7b6dfa0f..8dc0161cec8f 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -26,7 +26,7 @@ #include "common.h" static nokprobe_inline -int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, +void __skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, unsigned long orig_ip) { /* @@ -41,33 +41,31 @@ int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, __this_cpu_write(current_kprobe, NULL); if (orig_ip) regs->ip = orig_ip; - return 1; } int skip_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - if (kprobe_ftrace(p)) - return __skip_singlestep(p, regs, kcb, 0); - else - return 0; + if (kprobe_ftrace(p)) { + __skip_singlestep(p, regs, kcb, 0); + preempt_enable_no_resched(); + return 1; + } + return 0; } NOKPROBE_SYMBOL(skip_singlestep); -/* Ftrace callback handler for kprobes */ +/* Ftrace callback handler for kprobes -- called under preepmt disabed */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct pt_regs *regs) { struct kprobe *p; struct kprobe_ctlblk *kcb; - unsigned long flags; - - /* Disable irq for emulating a breakpoint and avoiding preempt */ - local_irq_save(flags); + /* Preempt is disabled by ftrace */ p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) - goto end; + return; kcb = get_kprobe_ctlblk(); if (kprobe_running()) { @@ -77,17 +75,19 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ regs->ip = ip + sizeof(kprobe_opcode_t); + /* To emulate trap based kprobes, preempt_disable here */ + preempt_disable(); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) + if (!p->pre_handler || !p->pre_handler(p, regs)) { __skip_singlestep(p, regs, kcb, orig_ip); + preempt_enable_no_resched(); + } /* * If pre_handler returns !0, it sets regs->ip and - * resets current kprobe. + * resets current kprobe, and keep preempt count +1. */ } -end: - local_irq_restore(flags); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 4f98aad38237..e941136e24d8 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -142,11 +142,11 @@ void optprobe_template_func(void); STACK_FRAME_NON_STANDARD(optprobe_template_func); #define TMPL_MOVE_IDX \ - ((long)&optprobe_template_val - (long)&optprobe_template_entry) + ((long)optprobe_template_val - (long)optprobe_template_entry) #define TMPL_CALL_IDX \ - ((long)&optprobe_template_call - (long)&optprobe_template_entry) + ((long)optprobe_template_call - (long)optprobe_template_entry) #define TMPL_END_IDX \ - ((long)&optprobe_template_end - (long)&optprobe_template_entry) + ((long)optprobe_template_end - (long)optprobe_template_entry) #define INT3_SIZE sizeof(kprobe_opcode_t) @@ -154,17 +154,15 @@ STACK_FRAME_NON_STANDARD(optprobe_template_func); static void optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long flags; - /* This is possible if op is under delayed unoptimizing */ if (kprobe_disabled(&op->kp)) return; - local_irq_save(flags); + preempt_disable(); if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); } else { + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); /* Save skipped registers */ #ifdef CONFIG_X86_64 regs->cs = __KERNEL_CS; @@ -180,17 +178,17 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) opt_pre_handler(&op->kp, regs); __this_cpu_write(current_kprobe, NULL); } - local_irq_restore(flags); + preempt_enable_no_resched(); } NOKPROBE_SYMBOL(optimized_callback); -static int copy_optimized_instructions(u8 *dest, u8 *src) +static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) { struct insn insn; int len = 0, ret; while (len < RELATIVEJUMP_SIZE) { - ret = __copy_instruction(dest + len, src + len, &insn); + ret = __copy_instruction(dest + len, src + len, real, &insn); if (!ret || !can_boost(&insn, src + len)) return -EINVAL; len += ret; @@ -343,57 +341,66 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *__unused) { - u8 *buf; - int ret; + u8 *buf = NULL, *slot; + int ret, len; long rel; if (!can_optimize((unsigned long)op->kp.addr)) return -EILSEQ; - op->optinsn.insn = get_optinsn_slot(); - if (!op->optinsn.insn) + buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL); + if (!buf) return -ENOMEM; + op->optinsn.insn = slot = get_optinsn_slot(); + if (!slot) { + ret = -ENOMEM; + goto out; + } + /* * Verify if the address gap is in 2GB range, because this uses * a relative jump. */ - rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; + rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE; if (abs(rel) > 0x7fffffff) { - __arch_remove_optimized_kprobe(op, 0); - return -ERANGE; + ret = -ERANGE; + goto err; } - buf = (u8 *)op->optinsn.insn; - set_memory_rw((unsigned long)buf & PAGE_MASK, 1); + /* Copy arch-dep-instance from template */ + memcpy(buf, optprobe_template_entry, TMPL_END_IDX); /* Copy instructions into the out-of-line buffer */ - ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); - if (ret < 0) { - __arch_remove_optimized_kprobe(op, 0); - return ret; - } + ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr, + slot + TMPL_END_IDX); + if (ret < 0) + goto err; op->optinsn.size = ret; - - /* Copy arch-dep-instance from template */ - memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); + len = TMPL_END_IDX + op->optinsn.size; /* Set probe information */ synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); /* Set probe function call */ - synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); + synthesize_relcall(buf + TMPL_CALL_IDX, + slot + TMPL_CALL_IDX, optimized_callback); /* Set returning jmp instruction at the tail of out-of-line buffer */ - synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, + synthesize_reljump(buf + len, slot + len, (u8 *)op->kp.addr + op->optinsn.size); - - set_memory_ro((unsigned long)buf & PAGE_MASK, 1); - - flush_icache_range((unsigned long) buf, - (unsigned long) buf + TMPL_END_IDX + - op->optinsn.size + RELATIVEJUMP_SIZE); - return 0; + len += RELATIVEJUMP_SIZE; + + /* We have to use text_poke for instuction buffer because it is RO */ + text_poke(slot, buf, len); + ret = 0; +out: + kfree(buf); + return ret; + +err: + __arch_remove_optimized_kprobe(op, 0); + goto out; } /* diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 4d17bacf4030..a2fcf037bd80 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -102,7 +102,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt) static void install_ldt(struct mm_struct *current_mm, struct ldt_struct *ldt) { - /* Synchronizes with lockless_dereference in load_mm_ldt. */ + /* Synchronizes with READ_ONCE in load_mm_ldt. */ smp_store_release(¤t_mm->context.ldt, ldt); /* Activate the LDT for all CPUs using current_mm. */ diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 35aafc95e4b8..18bc9b51ac9b 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -105,7 +105,7 @@ static void nmi_max_handler(struct irq_work *w) { struct nmiaction *a = container_of(w, struct nmiaction, irq_work); int remainder_ns, decimal_msecs; - u64 whole_msecs = ACCESS_ONCE(a->max_duration); + u64 whole_msecs = READ_ONCE(a->max_duration); remainder_ns = do_div(whole_msecs, (1000 * 1000)); decimal_msecs = remainder_ns / 1000; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 19a3e8f961c7..041096bdef86 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -115,8 +115,18 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target, return 5; } -/* Neat trick to map patch type back to the call within the - * corresponding structure. */ +DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); + +void __init native_pv_lock_init(void) +{ + if (!static_cpu_has(X86_FEATURE_HYPERVISOR)) + static_branch_disable(&virt_spin_lock_key); +} + +/* + * Neat trick to map patch type back to the call within the + * corresponding structure. + */ static void *get_call_destination(u8 type) { struct paravirt_patch_template tmpl = { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ad59edd84de7..7ab6db86b573 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -77,6 +77,7 @@ #include <asm/i8259.h> #include <asm/realmode.h> #include <asm/misc.h> +#include <asm/qspinlock.h> /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -194,6 +195,12 @@ static void smp_callin(void) smp_store_cpu_info(cpuid); /* + * The topology information must be up to date before + * calibrate_delay() and notify_cpu_starting(). + */ + set_cpu_sibling_map(raw_smp_processor_id()); + + /* * Get our bogomips. * Update loops_per_jiffy in cpu_data. Previous call to * smp_store_cpu_info() stored a value that is close but not as @@ -203,11 +210,6 @@ static void smp_callin(void) cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy; pr_debug("Stack at about %p\n", &cpuid); - /* - * This must be done before setting cpu_online_mask - * or calling notify_cpu_starting. - */ - set_cpu_sibling_map(raw_smp_processor_id()); wmb(); notify_cpu_starting(cpuid); @@ -1094,7 +1096,7 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) unsigned long flags; int err, ret = 0; - WARN_ON(irqs_disabled()); + lockdep_assert_irqs_enabled(); pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); @@ -1357,6 +1359,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) pr_info("CPU0: "); print_cpu_info(&cpu_data(0)); + native_pv_lock_init(); + uv_system_init(); set_mtrr_aps_delayed_init(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 67db4f43309e..5a6b8f809792 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -209,9 +209,6 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, if (fixup_exception(regs, trapnr)) return 0; - if (fixup_bug(regs, trapnr)) - return 0; - tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; die(str, regs, error_code); @@ -292,6 +289,13 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); + /* + * WARN*()s end up here; fix them up before we call the + * notifier chain. + */ + if (!user_mode(regs) && fixup_bug(regs, trapnr)) + return; + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { cond_local_irq_enable(regs); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 796d96bb0821..ad2b925a808e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1346,12 +1346,10 @@ void __init tsc_init(void) unsigned long calibrate_delay_is_known(void) { int sibling, cpu = smp_processor_id(); - struct cpumask *mask = topology_core_cpumask(cpu); + int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC); + const struct cpumask *mask = topology_core_cpumask(cpu); - if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC)) - return 0; - - if (!mask) + if (tsc_disabled || !constant_tsc || !mask) return 0; sibling = cpumask_any_but(mask, cpu); diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index b95007e7c1b3..a3f973b2c97a 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -279,7 +279,7 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr, if (!stack_access_ok(state, addr, sizeof(long))) return false; - *val = READ_ONCE_TASK_STACK(state->task, *(unsigned long *)addr); + *val = READ_ONCE_NOCHECK(*(unsigned long *)addr); return true; } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7a69cf053711..a119b361b8b7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -443,7 +443,7 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte) static u64 __get_spte_lockless(u64 *sptep) { - return ACCESS_ONCE(*sptep); + return READ_ONCE(*sptep); } #else union split_spte { @@ -4819,7 +4819,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, * If we don't have indirect shadow pages, it means no page is * write-protected, so we can exit simply. */ - if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) + if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) return; remote_flush = local_flush = false; diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index ea67dc876316..01c1371f39f8 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -157,7 +157,7 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, return false; index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL); - return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); + return !!READ_ONCE(slot->arch.gfn_track[mode][index]); } void kvm_page_track_cleanup(struct kvm *kvm) diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S index bf2c6074efd2..dc2ab6ea6768 100644 --- a/arch/x86/lib/rwsem.S +++ b/arch/x86/lib/rwsem.S @@ -98,6 +98,18 @@ ENTRY(call_rwsem_down_read_failed) ret ENDPROC(call_rwsem_down_read_failed) +ENTRY(call_rwsem_down_read_failed_killable) + FRAME_BEGIN + save_common_regs + __ASM_SIZE(push,) %__ASM_REG(dx) + movq %rax,%rdi + call rwsem_down_read_failed_killable + __ASM_SIZE(pop,) %__ASM_REG(dx) + restore_common_regs + FRAME_END + ret +ENDPROC(call_rwsem_down_read_failed_killable) + ENTRY(call_rwsem_down_write_failed) FRAME_BEGIN save_common_regs diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index c3521e2be396..3321b446b66c 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -67,12 +67,17 @@ bool ex_handler_refcount(const struct exception_table_entry *fixup, * wrapped around) will be set. Additionally, seeing the refcount * reach 0 will set ZF (Zero Flag: result was zero). In each of * these cases we want a report, since it's a boundary condition. - * + * The SF case is not reported since it indicates post-boundary + * manipulations below zero or above INT_MAX. And if none of the + * flags are set, something has gone very wrong, so report it. */ if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) { bool zero = regs->flags & X86_EFLAGS_ZF; refcount_error_report(regs, zero ? "hit zero" : "overflow"); + } else if ((regs->flags & X86_EFLAGS_SF) == 0) { + /* Report if none of OF, ZF, nor SF are set. */ + refcount_error_report(regs, "unexpected saturation"); } return true; diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 16c5f37933a2..0286327e65fa 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -40,7 +40,7 @@ static char sme_cmdline_off[] __initdata = "off"; * section is later cleared. */ u64 sme_me_mask __section(.data) = 0; -EXPORT_SYMBOL_GPL(sme_me_mask); +EXPORT_SYMBOL(sme_me_mask); /* Buffer used for early in-place encryption by BSP, no locking needed */ static char sme_early_buffer[PAGE_SIZE] __aligned(PAGE_SIZE); diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 350f7096baac..7913b6921959 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -212,8 +212,8 @@ static void arch_perfmon_setup_counters(void) eax.full = cpuid_eax(0xa); /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */ - if (eax.split.version_id == 0 && __this_cpu_read(cpu_info.x86) == 6 && - __this_cpu_read(cpu_info.x86_model) == 15) { + if (eax.split.version_id == 0 && boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model == 15) { eax.split.version_id = 2; eax.split.num_counters = 2; eax.split.bit_width = 40; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 6083ba462f35..13b4f19b9131 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -547,7 +547,7 @@ int xen_alloc_p2m_entry(unsigned long pfn) if (p2m_top_mfn && pfn < MAX_P2M_PFN) { topidx = p2m_top_index(pfn); top_mfn_p = &p2m_top_mfn[topidx]; - mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); + mid_mfn = READ_ONCE(p2m_top_mfn_p[topidx]); BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 08324c64005d..02f3445a2b5f 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -11,6 +11,7 @@ #include <linux/slab.h> #include <asm/paravirt.h> +#include <asm/qspinlock.h> #include <xen/interface/xen.h> #include <xen/events.h> @@ -81,8 +82,11 @@ void xen_init_lock_cpu(int cpu) int irq; char *name; - if (!xen_pvspin) + if (!xen_pvspin) { + if (cpu == 0) + static_branch_disable(&virt_spin_lock_key); return; + } WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", cpu, per_cpu(lock_kicker_irq, cpu)); |