diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/mem_encrypt.c | 24 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 25 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 27 | ||||
-rw-r--r-- | arch/x86/mm/pti.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 7 |
7 files changed, 82 insertions, 9 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index b9123c497e0a..47bebfe6efa7 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -837,7 +837,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, printk(KERN_CONT "\n"); - show_opcodes((u8 *)regs->ip, loglvl); + show_opcodes(regs, loglvl); } static void diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 7a8fc26c1115..faca978ebf9d 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end) set_memory_np_noalias(begin_ul, len_pages); } +void __weak mem_encrypt_free_decrypted_mem(void) { } + void __ref free_initmem(void) { e820__reallocate_tables(); + mem_encrypt_free_decrypted_mem(); + free_kernel_image_pages(&__init_begin, &__init_end); } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index b2de398d1fd3..006f373f54ab 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -348,6 +348,30 @@ bool sev_active(void) EXPORT_SYMBOL(sev_active); /* Architecture __weak replacement functions */ +void __init mem_encrypt_free_decrypted_mem(void) +{ + unsigned long vaddr, vaddr_end, npages; + int r; + + vaddr = (unsigned long)__start_bss_decrypted_unused; + vaddr_end = (unsigned long)__end_bss_decrypted; + npages = (vaddr_end - vaddr) >> PAGE_SHIFT; + + /* + * The unused memory range was mapped decrypted, change the encryption + * attribute from decrypted to encrypted before freeing it. + */ + if (mem_encrypt_active()) { + r = set_memory_encrypted(vaddr, npages); + if (r) { + pr_warn("failed to free unused decrypted pages\n"); + return; + } + } + + free_init_pages("unused decrypted", vaddr, vaddr_end); +} + void __init mem_encrypt_init(void) { if (!sme_me_mask) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 8d6c34fe49be..51a5a69ecac9 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1420,6 +1420,29 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) return 0; } +/* + * Machine check recovery code needs to change cache mode of poisoned + * pages to UC to avoid speculative access logging another error. But + * passing the address of the 1:1 mapping to set_memory_uc() is a fine + * way to encourage a speculative access. So we cheat and flip the top + * bit of the address. This works fine for the code that updates the + * page tables. But at the end of the process we need to flush the cache + * and the non-canonical address causes a #GP fault when used by the + * CLFLUSH instruction. + * + * But in the common case we already have a canonical address. This code + * will fix the top bit if needed and is a no-op otherwise. + */ +static inline unsigned long make_addr_canonical_again(unsigned long addr) +{ +#ifdef CONFIG_X86_64 + return (long)(addr << 1) >> 1; +#else + return addr; +#endif +} + + static int change_page_attr_set_clr(unsigned long *addr, int numpages, pgprot_t mask_set, pgprot_t mask_clr, int force_split, int in_flag, @@ -1465,7 +1488,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, * Save address for cache flush. *addr is modified in the call * to __change_page_attr_set_clr() below. */ - baddr = *addr; + baddr = make_addr_canonical_again(*addr); } /* Must avoid aliasing mappings in the highmem code */ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index e848a4811785..59274e2c1ac4 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -115,6 +115,8 @@ static inline void pgd_list_del(pgd_t *pgd) #define UNSHARED_PTRS_PER_PGD \ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) +#define MAX_UNSHARED_PTRS_PER_PGD \ + max_t(size_t, KERNEL_PGD_BOUNDARY, PTRS_PER_PGD) static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm) @@ -181,6 +183,7 @@ static void pgd_dtor(pgd_t *pgd) * and initialize the kernel pmds here. */ #define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD +#define MAX_PREALLOCATED_PMDS MAX_UNSHARED_PTRS_PER_PGD /* * We allocate separate PMDs for the kernel part of the user page-table @@ -189,6 +192,7 @@ static void pgd_dtor(pgd_t *pgd) */ #define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \ KERNEL_PGD_PTRS : 0) +#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) { @@ -210,7 +214,9 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) /* No need to prepopulate any pagetable entries in non-PAE modes. */ #define PREALLOCATED_PMDS 0 +#define MAX_PREALLOCATED_PMDS 0 #define PREALLOCATED_USER_PMDS 0 +#define MAX_PREALLOCATED_USER_PMDS 0 #endif /* CONFIG_X86_PAE */ static void free_pmds(struct mm_struct *mm, pmd_t *pmds[], int count) @@ -269,7 +275,7 @@ static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp) if (pgd_val(pgd) != 0) { pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd); - *pgdp = native_make_pgd(0); + pgd_clear(pgdp); paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); pmd_free(mm, pmd); @@ -428,8 +434,8 @@ static inline void _pgd_free(pgd_t *pgd) pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; - pmd_t *u_pmds[PREALLOCATED_USER_PMDS]; - pmd_t *pmds[PREALLOCATED_PMDS]; + pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS]; + pmd_t *pmds[MAX_PREALLOCATED_PMDS]; pgd = _pgd_alloc(); @@ -494,7 +500,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, int changed = !pte_same(*ptep, entry); if (changed && dirty) - *ptep = entry; + set_pte(ptep, entry); return changed; } @@ -509,7 +515,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, VM_BUG_ON(address & ~HPAGE_PMD_MASK); if (changed && dirty) { - *pmdp = entry; + set_pmd(pmdp, entry); /* * We had a write-protection fault here and changed the pmd * to to more permissive. No need to flush the TLB for that, @@ -529,7 +535,7 @@ int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, VM_BUG_ON(address & ~HPAGE_PUD_MASK); if (changed && dirty) { - *pudp = entry; + set_pud(pudp, entry); /* * We had a write-protection fault here and changed the pud * to to more permissive. No need to flush the TLB for that, @@ -637,6 +643,15 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) { unsigned long address = __fix_to_virt(idx); +#ifdef CONFIG_X86_64 + /* + * Ensure that the static initial page tables are covering the + * fixmap completely. + */ + BUILD_BUG_ON(__end_of_permanent_fixed_addresses > + (FIXMAP_PMD_NUM * PTRS_PER_PTE)); +#endif + if (idx >= __end_of_fixed_addresses) { BUG(); return; diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 31341ae7309f..c1fc1ae6b429 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -248,7 +248,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) * * Returns a pointer to a PTE on success, or NULL on failure. */ -static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address) +static pte_t *pti_user_pagetable_walk_pte(unsigned long address) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); pmd_t *pmd; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 9517d1b2a281..e96b99eb800c 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -305,6 +305,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush); + /* Let nmi_uaccess_okay() know that we're changing CR3. */ + this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); + barrier(); + if (need_flush) { this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); @@ -335,6 +339,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (next != &init_mm) this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); + /* Make sure we write CR3 before loaded_mm. */ + barrier(); + this_cpu_write(cpu_tlbstate.loaded_mm, next); this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid); } |