diff options
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/pgtable-3level.h | 46 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel.c | 8 |
3 files changed, 29 insertions, 26 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8ef85139553f..887bf86c447a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1492,6 +1492,7 @@ config X86_PAE config X86_5LEVEL bool "Enable 5-level page tables support" + default y select DYNAMIC_MEMORY_LAYOUT select SPARSEMEM_VMEMMAP depends on X86_64 diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index e3633795fb22..5afb5e0fe903 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -36,39 +36,41 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte) #define pmd_read_atomic pmd_read_atomic /* - * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with - * a "*pmdp" dereference done by gcc. Problem is, in certain places - * where pte_offset_map_lock is called, concurrent page faults are + * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with + * a "*pmdp" dereference done by GCC. Problem is, in certain places + * where pte_offset_map_lock() is called, concurrent page faults are * allowed, if the mmap_sem is hold for reading. An example is mincore * vs page faults vs MADV_DONTNEED. On the page fault side - * pmd_populate rightfully does a set_64bit, but if we're reading the + * pmd_populate() rightfully does a set_64bit(), but if we're reading the * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen - * because gcc will not read the 64bit of the pmd atomically. To fix - * this all places running pmd_offset_map_lock() while holding the + * because GCC will not read the 64-bit value of the pmd atomically. + * + * To fix this all places running pte_offset_map_lock() while holding the * mmap_sem in read mode, shall read the pmdp pointer using this - * function to know if the pmd is null nor not, and in turn to know if - * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd + * function to know if the pmd is null or not, and in turn to know if + * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd * operations. * - * Without THP if the mmap_sem is hold for reading, the pmd can only - * transition from null to not null while pmd_read_atomic runs. So + * Without THP if the mmap_sem is held for reading, the pmd can only + * transition from null to not null while pmd_read_atomic() runs. So * we can always return atomic pmd values with this function. * - * With THP if the mmap_sem is hold for reading, the pmd can become + * With THP if the mmap_sem is held for reading, the pmd can become * trans_huge or none or point to a pte (and in turn become "stable") - * at any time under pmd_read_atomic. We could read it really - * atomically here with a atomic64_read for the THP enabled case (and + * at any time under pmd_read_atomic(). We could read it truly + * atomically here with an atomic64_read() for the THP enabled case (and * it would be a whole lot simpler), but to avoid using cmpxchg8b we * only return an atomic pmdval if the low part of the pmdval is later - * found stable (i.e. pointing to a pte). And we're returning a none - * pmdval if the low part of the pmd is none. In some cases the high - * and low part of the pmdval returned may not be consistent if THP is - * enabled (the low part may point to previously mapped hugepage, - * while the high part may point to a more recently mapped hugepage), - * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part - * of the pmd to be read atomically to decide if the pmd is unstable - * or not, with the only exception of when the low part of the pmd is - * zero in which case we return a none pmd. + * found to be stable (i.e. pointing to a pte). We are also returning a + * 'none' (zero) pmdval if the low part of the pmd is zero. + * + * In some cases the high and low part of the pmdval returned may not be + * consistent if THP is enabled (the low part may point to previously + * mapped hugepage, while the high part may point to a more recently + * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only + * needs the low part of the pmd to be read atomically to decide if the + * pmd is unstable or not, with the only exception when the low part + * of the pmd is zero, in which case we return a 'none' pmd. */ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) { diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 11d5c5950e2d..4a900804a023 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -819,7 +819,7 @@ static const struct _tlb_table intel_tlb_table[] = { { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, - { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" }, + { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" }, { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, @@ -847,7 +847,7 @@ static const struct _tlb_table intel_tlb_table[] = { { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, - { 0xc2, TLB_DATA_2M_4M, 16, " DTLB 2 MByte/4MByte pages, 4-way associative" }, + { 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" }, { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, { 0x00, 0, 0 } }; @@ -859,8 +859,8 @@ static void intel_tlb_lookup(const unsigned char desc) return; /* look up this descriptor in the table */ - for (k = 0; intel_tlb_table[k].descriptor != desc && \ - intel_tlb_table[k].descriptor != 0; k++) + for (k = 0; intel_tlb_table[k].descriptor != desc && + intel_tlb_table[k].descriptor != 0; k++) ; if (intel_tlb_table[k].tlb_type == 0) |