aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/Makefile7
-rw-r--r--arch/powerpc/mm/copro_fault.c2
-rw-r--r--arch/powerpc/mm/fault.c4
-rw-r--r--arch/powerpc/mm/hash_low_32.S3
-rw-r--r--arch/powerpc/mm/hash_native_64.c42
-rw-r--r--arch/powerpc/mm/hash_utils_64.c142
-rw-r--r--arch/powerpc/mm/hugetlbpage.c7
-rw-r--r--arch/powerpc/mm/init_32.c2
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c81
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c56
-rw-r--r--arch/powerpc/mm/numa.c46
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c11
-rw-r--r--arch/powerpc/mm/pgtable-radix.c44
-rw-r--r--arch/powerpc/mm/pgtable.c2
-rw-r--r--arch/powerpc/mm/slb_low.S8
-rw-r--r--arch/powerpc/mm/tlb-radix.c36
16 files changed, 326 insertions, 167 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index f2cea6d5e764..1a4e570f7894 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -7,17 +7,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y := fault.o mem.o pgtable.o mmap.o \
- init_$(CONFIG_WORD_SIZE).o \
- pgtable_$(CONFIG_WORD_SIZE).o
+ init_$(BITS).o pgtable_$(BITS).o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
tlb_nohash_low.o
-obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o
+obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
obj-$(CONFIG_PPC_STD_MMU_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
-obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
ifeq ($(CONFIG_PPC_STD_MMU_64),y)
obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index bb0354222b11..362954f98029 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -106,6 +106,8 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
switch (REGION_ID(ea)) {
case USER_REGION_ID:
pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
+ if (mm == NULL)
+ return 1;
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index bb1ffc559f38..d0b137d96df1 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -205,7 +205,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
* The return value is 0 if the fault was handled, or the signal
* number if this is a kernel fault that can't be handled here.
*/
-int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
+int do_page_fault(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
enum ctx_state prev_state = exception_enter();
@@ -498,8 +498,8 @@ bad_area_nosemaphore:
bail:
exception_exit(prev_state);
return rc;
-
}
+NOKPROBE_SYMBOL(do_page_fault);
/*
* bad_page_fault is called when we have a bad access from the kernel.
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index 115347f74ce5..09cc50c8dace 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -26,6 +26,7 @@
#include <asm/ppc_asm.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
+#include <asm/export.h>
#ifdef CONFIG_SMP
.section .bss
@@ -33,6 +34,7 @@
.globl mmu_hash_lock
mmu_hash_lock:
.space 4
+EXPORT_SYMBOL(mmu_hash_lock)
#endif /* CONFIG_SMP */
/*
@@ -575,6 +577,7 @@ _GLOBAL(flush_hash_pages)
rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */
stwcx. r8,0,r5 /* update the pte */
bne- 33b
+EXPORT_SYMBOL(flush_hash_pages)
/* Get the address of the primary PTE group in the hash table (r3) */
_GLOBAL(flush_hash_patch_A)
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 0e4e9654bd2c..83ddc0e171b0 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -493,36 +493,6 @@ static void native_hugepage_invalidate(unsigned long vsid,
}
#endif
-static inline int __hpte_actual_psize(unsigned int lp, int psize)
-{
- int i, shift;
- unsigned int mask;
-
- /* start from 1 ignoring MMU_PAGE_4K */
- for (i = 1; i < MMU_PAGE_COUNT; i++) {
-
- /* invalid penc */
- if (mmu_psize_defs[psize].penc[i] == -1)
- continue;
- /*
- * encoding bits per actual page size
- * PTE LP actual page size
- * rrrr rrrz >=8KB
- * rrrr rrzz >=16KB
- * rrrr rzzz >=32KB
- * rrrr zzzz >=64KB
- * .......
- */
- shift = mmu_psize_defs[i].shift - LP_SHIFT;
- if (shift > LP_BITS)
- shift = LP_BITS;
- mask = (1 << shift) - 1;
- if ((lp & mask) == mmu_psize_defs[psize].penc[i])
- return i;
- }
- return -1;
-}
-
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
int *psize, int *apsize, int *ssize, unsigned long *vpn)
{
@@ -538,16 +508,8 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
size = MMU_PAGE_4K;
a_size = MMU_PAGE_4K;
} else {
- for (size = 0; size < MMU_PAGE_COUNT; size++) {
-
- /* valid entries have a shift value */
- if (!mmu_psize_defs[size].shift)
- continue;
-
- a_size = __hpte_actual_psize(lp, size);
- if (a_size != -1)
- break;
- }
+ size = hpte_page_sizes[lp] & 0xf;
+ a_size = hpte_page_sizes[lp] >> 4;
}
/* This works for all page sizes, and for 256M and 1T segments */
if (cpu_has_feature(CPU_FTR_ARCH_300))
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 0821556e16f4..5503078090cd 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -93,6 +93,9 @@ static unsigned long _SDR1;
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
EXPORT_SYMBOL_GPL(mmu_psize_defs);
+u8 hpte_page_sizes[1 << LP_BITS];
+EXPORT_SYMBOL_GPL(hpte_page_sizes);
+
struct hash_pte *htab_address;
unsigned long htab_size_bytes;
unsigned long htab_hash_mask;
@@ -526,7 +529,7 @@ static bool might_have_hea(void)
*/
#ifdef CONFIG_IBMEBUS
return !cpu_has_feature(CPU_FTR_ARCH_207S) &&
- !firmware_has_feature(FW_FEATURE_SPLPAR);
+ firmware_has_feature(FW_FEATURE_SPLPAR);
#else
return false;
#endif
@@ -564,8 +567,60 @@ static void __init htab_scan_page_sizes(void)
#endif /* CONFIG_HUGETLB_PAGE */
}
+/*
+ * Fill in the hpte_page_sizes[] array.
+ * We go through the mmu_psize_defs[] array looking for all the
+ * supported base/actual page size combinations. Each combination
+ * has a unique pagesize encoding (penc) value in the low bits of
+ * the LP field of the HPTE. For actual page sizes less than 1MB,
+ * some of the upper LP bits are used for RPN bits, meaning that
+ * we need to fill in several entries in hpte_page_sizes[].
+ *
+ * In diagrammatic form, with r = RPN bits and z = page size bits:
+ * PTE LP actual page size
+ * rrrr rrrz >=8KB
+ * rrrr rrzz >=16KB
+ * rrrr rzzz >=32KB
+ * rrrr zzzz >=64KB
+ * ...
+ *
+ * The zzzz bits are implementation-specific but are chosen so that
+ * no encoding for a larger page size uses the same value in its
+ * low-order N bits as the encoding for the 2^(12+N) byte page size
+ * (if it exists).
+ */
+static void init_hpte_page_sizes(void)
+{
+ long int ap, bp;
+ long int shift, penc;
+
+ for (bp = 0; bp < MMU_PAGE_COUNT; ++bp) {
+ if (!mmu_psize_defs[bp].shift)
+ continue; /* not a supported page size */
+ for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
+ penc = mmu_psize_defs[bp].penc[ap];
+ if (penc == -1)
+ continue;
+ shift = mmu_psize_defs[ap].shift - LP_SHIFT;
+ if (shift <= 0)
+ continue; /* should never happen */
+ /*
+ * For page sizes less than 1MB, this loop
+ * replicates the entry for all possible values
+ * of the rrrr bits.
+ */
+ while (penc < (1 << LP_BITS)) {
+ hpte_page_sizes[penc] = (ap << 4) | bp;
+ penc += 1 << shift;
+ }
+ }
+ }
+}
+
static void __init htab_init_page_sizes(void)
{
+ init_hpte_page_sizes();
+
if (!debug_pagealloc_enabled()) {
/*
* Pick a size for the linear mapping. Currently, we only
@@ -711,6 +766,29 @@ int remove_section_mapping(unsigned long start, unsigned long end)
}
#endif /* CONFIG_MEMORY_HOTPLUG */
+static void update_hid_for_hash(void)
+{
+ unsigned long hid0;
+ unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
+
+ asm volatile("ptesync": : :"memory");
+ /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory");
+ asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
+ /*
+ * now switch the HID
+ */
+ hid0 = mfspr(SPRN_HID0);
+ hid0 &= ~HID0_POWER9_RADIX;
+ mtspr(SPRN_HID0, hid0);
+ asm volatile("isync": : :"memory");
+
+ /* Wait for it to happen */
+ while ((mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
+ cpu_relax();
+}
+
static void __init hash_init_partition_table(phys_addr_t hash_table,
unsigned long htab_size)
{
@@ -737,6 +815,8 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
*/
partition_tb->patb1 = 0;
pr_info("Partition table %p\n", partition_tb);
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ update_hid_for_hash();
/*
* update partition table control register,
* 64 K size.
@@ -949,6 +1029,10 @@ void hash__early_init_mmu_secondary(void)
{
/* Initialize hash table for that CPU */
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ update_hid_for_hash();
+
if (!cpu_has_feature(CPU_FTR_ARCH_300))
mtspr(SPRN_SDR1, _SDR1);
else
@@ -1460,6 +1544,29 @@ out_exit:
local_irq_restore(flags);
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void tm_flush_hash_page(int local)
+{
+ /*
+ * Transactions are not aborted by tlbiel, only tlbie. Without, syncing a
+ * page back to a block device w/PIO could pick up transactional data
+ * (bad!) so we force an abort here. Before the sync the page will be
+ * made read-only, which will flush_hash_page. BIG ISSUE here: if the
+ * kernel uses a page from userspace without unmapping it first, it may
+ * see the speculated version.
+ */
+ if (local && cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+ MSR_TM_ACTIVE(current->thread.regs->msr)) {
+ tm_enable();
+ tm_abort(TM_CAUSE_TLBI);
+ }
+}
+#else
+static inline void tm_flush_hash_page(int local)
+{
+}
+#endif
+
/* WARNING: This is called from hash_low_64.S, if you change this prototype,
* do not forget to update the assembly call site !
*/
@@ -1486,21 +1593,7 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
ssize, local);
} pte_iterate_hashed_end();
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* Transactions are not aborted by tlbiel, only tlbie.
- * Without, syncing a page back to a block device w/ PIO could pick up
- * transactional data (bad!) so we force an abort here. Before the
- * sync the page will be made read-only, which will flush_hash_page.
- * BIG ISSUE here: if the kernel uses a page from userspace without
- * unmapping it first, it may see the speculated version.
- */
- if (local && cpu_has_feature(CPU_FTR_TM) &&
- current->thread.regs &&
- MSR_TM_ACTIVE(current->thread.regs->msr)) {
- tm_enable();
- tm_abort(TM_CAUSE_TLBI);
- }
-#endif
+ tm_flush_hash_page(local);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -1557,22 +1650,7 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
MMU_PAGE_16M, ssize, local);
}
tm_abort:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* Transactions are not aborted by tlbiel, only tlbie.
- * Without, syncing a page back to a block device w/ PIO could pick up
- * transactional data (bad!) so we force an abort here. Before the
- * sync the page will be made read-only, which will flush_hash_page.
- * BIG ISSUE here: if the kernel uses a page from userspace without
- * unmapping it first, it may see the speculated version.
- */
- if (local && cpu_has_feature(CPU_FTR_TM) &&
- current->thread.regs &&
- MSR_TM_ACTIVE(current->thread.regs->msr)) {
- tm_enable();
- tm_abort(TM_CAUSE_TLBI);
- }
-#endif
- return;
+ tm_flush_hash_page(local);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7372ee13eb1e..a5d3ecdabc44 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -1019,8 +1019,15 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
pte = READ_ONCE(*ptep);
mask = _PAGE_PRESENT | _PAGE_READ;
+
+ /*
+ * On some CPUs like the 8xx, _PAGE_RW hence _PAGE_WRITE is defined
+ * as 0 and _PAGE_RO has to be set when a page is not writable
+ */
if (write)
mask |= _PAGE_WRITE;
+ else
+ mask |= _PAGE_RO;
if ((pte_val(pte) & mask) != mask)
return 0;
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 448685fbf27c..8a7c38b8d335 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -137,7 +137,7 @@ void __init MMU_init(void)
if (memblock.memory.cnt > 1) {
#ifndef CONFIG_WII
memblock_enforce_memory_limit(memblock.memory.regions[0].size);
- printk(KERN_WARNING "Only using first contiguous memory region");
+ pr_warn("Only using first contiguous memory region\n");
#else
wii_memory_fixups();
#endif
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index da6a2168ae9e..e0f1c33601dd 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -15,6 +15,9 @@
#include <linux/rculist.h>
#include <linux/vmalloc.h>
#include <linux/mutex.h>
+#include <linux/migrate.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
#include <asm/mmu_context.h>
static DEFINE_MUTEX(mem_list_mutex);
@@ -72,6 +75,55 @@ bool mm_iommu_preregistered(void)
}
EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
+/*
+ * Taken from alloc_migrate_target with changes to remove CMA allocations
+ */
+struct page *new_iommu_non_cma_page(struct page *page, unsigned long private,
+ int **resultp)
+{
+ gfp_t gfp_mask = GFP_USER;
+ struct page *new_page;
+
+ if (PageHuge(page) || PageTransHuge(page) || PageCompound(page))
+ return NULL;
+
+ if (PageHighMem(page))
+ gfp_mask |= __GFP_HIGHMEM;
+
+ /*
+ * We don't want the allocation to force an OOM if possibe
+ */
+ new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN);
+ return new_page;
+}
+
+static int mm_iommu_move_page_from_cma(struct page *page)
+{
+ int ret = 0;
+ LIST_HEAD(cma_migrate_pages);
+
+ /* Ignore huge pages for now */
+ if (PageHuge(page) || PageTransHuge(page) || PageCompound(page))
+ return -EBUSY;
+
+ lru_add_drain();
+ ret = isolate_lru_page(page);
+ if (ret)
+ return ret;
+
+ list_add(&page->lru, &cma_migrate_pages);
+ put_page(page); /* Drop the gup reference */
+
+ ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page,
+ NULL, 0, MIGRATE_SYNC, MR_CMA);
+ if (ret) {
+ if (!list_empty(&cma_migrate_pages))
+ putback_movable_pages(&cma_migrate_pages);
+ }
+
+ return 0;
+}
+
long mm_iommu_get(unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem)
{
@@ -124,15 +176,36 @@ long mm_iommu_get(unsigned long ua, unsigned long entries,
for (i = 0; i < entries; ++i) {
if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
1/* pages */, 1/* iswrite */, &page)) {
+ ret = -EFAULT;
for (j = 0; j < i; ++j)
- put_page(pfn_to_page(
- mem->hpas[j] >> PAGE_SHIFT));
+ put_page(pfn_to_page(mem->hpas[j] >>
+ PAGE_SHIFT));
vfree(mem->hpas);
kfree(mem);
- ret = -EFAULT;
goto unlock_exit;
}
-
+ /*
+ * If we get a page from the CMA zone, since we are going to
+ * be pinning these entries, we might as well move them out
+ * of the CMA zone if possible. NOTE: faulting in + migration
+ * can be expensive. Batching can be considered later
+ */
+ if (get_pageblock_migratetype(page) == MIGRATE_CMA) {
+ if (mm_iommu_move_page_from_cma(page))
+ goto populate;
+ if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
+ 1/* pages */, 1/* iswrite */,
+ &page)) {
+ ret = -EFAULT;
+ for (j = 0; j < i; ++j)
+ put_page(pfn_to_page(mem->hpas[j] >>
+ PAGE_SHIFT));
+ vfree(mem->hpas);
+ kfree(mem);
+ goto unlock_exit;
+ }
+ }
+populate:
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
}
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 7d95bc402dba..c491f2c8f2b9 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -369,44 +369,34 @@ void destroy_context(struct mm_struct *mm)
}
#ifdef CONFIG_SMP
-
-static int mmu_context_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int mmu_ctx_cpu_prepare(unsigned int cpu)
{
- unsigned int cpu = (unsigned int)(long)hcpu;
-
/* We don't touch CPU 0 map, it's allocated at aboot and kept
* around forever
*/
if (cpu == boot_cpuid)
- return NOTIFY_OK;
-
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
- stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
- kfree(stale_map[cpu]);
- stale_map[cpu] = NULL;
-
- /* We also clear the cpu_vm_mask bits of CPUs going away */
- clear_tasks_mm_cpumask(cpu);
- break;
-#endif /* CONFIG_HOTPLUG_CPU */
- }
- return NOTIFY_OK;
+ return 0;
+
+ pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
+ stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
+ return 0;
}
-static struct notifier_block mmu_context_cpu_nb = {
- .notifier_call = mmu_context_cpu_notify,
-};
+static int mmu_ctx_cpu_dead(unsigned int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ if (cpu == boot_cpuid)
+ return 0;
+
+ pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
+ kfree(stale_map[cpu]);
+ stale_map[cpu] = NULL;
+
+ /* We also clear the cpu_vm_mask bits of CPUs going away */
+ clear_tasks_mm_cpumask(cpu);
+#endif
+ return 0;
+}
#endif /* CONFIG_SMP */
@@ -469,7 +459,9 @@ void __init mmu_context_init(void)
#else
stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
- register_cpu_notifier(&mmu_context_cpu_nb);
+ cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
+ "powerpc/mmu/ctx:prepare",
+ mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
#endif
printk(KERN_INFO
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 75b9cd6150cc..a51c188b81f3 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -845,7 +845,7 @@ void __init dump_numa_cpu_topology(void)
return;
for_each_online_node(node) {
- printk(KERN_DEBUG "Node %d CPUs:", node);
+ pr_info("Node %d CPUs:", node);
count = 0;
/*
@@ -856,52 +856,18 @@ void __init dump_numa_cpu_topology(void)
if (cpumask_test_cpu(cpu,
node_to_cpumask_map[node])) {
if (count == 0)
- printk(" %u", cpu);
+ pr_cont(" %u", cpu);
++count;
} else {
if (count > 1)
- printk("-%u", cpu - 1);
+ pr_cont("-%u", cpu - 1);
count = 0;
}
}
if (count > 1)
- printk("-%u", nr_cpu_ids - 1);
- printk("\n");
- }
-}
-
-static void __init dump_numa_memory_topology(void)
-{
- unsigned int node;
- unsigned int count;
-
- if (min_common_depth == -1 || !numa_enabled)
- return;
-
- for_each_online_node(node) {
- unsigned long i;
-
- printk(KERN_DEBUG "Node %d Memory:", node);
-
- count = 0;
-
- for (i = 0; i < memblock_end_of_DRAM();
- i += (1 << SECTION_SIZE_BITS)) {
- if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
- if (count == 0)
- printk(" 0x%lx", i);
- ++count;
- } else {
- if (count > 0)
- printk("-0x%lx", i);
- count = 0;
- }
- }
-
- if (count > 0)
- printk("-0x%lx", i);
- printk("\n");
+ pr_cont("-%u", nr_cpu_ids - 1);
+ pr_cont("\n");
}
}
@@ -947,8 +913,6 @@ void __init initmem_init(void)
if (parse_numa_properties())
setup_nonnuma();
- else
- dump_numa_memory_topology();
memblock_dump_all();
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 34079302cc17..f4f437cbabf1 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -35,7 +35,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
#endif
changed = !pmd_same(*(pmdp), entry);
if (changed) {
- __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
+ __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry));
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
return changed;
@@ -116,3 +116,12 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
return;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/* For use by kexec */
+void mmu_cleanup_all(void)
+{
+ if (radix_enabled())
+ radix__mmu_cleanup_all();
+ else if (mmu_hash_ops.hpte_clear_all)
+ mmu_hash_ops.hpte_clear_all();
+}
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index af897d91d09f..688b54517655 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -294,6 +294,32 @@ found:
return;
}
+static void update_hid_for_radix(void)
+{
+ unsigned long hid0;
+ unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
+
+ asm volatile("ptesync": : :"memory");
+ /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(1), "i"(0), "i"(2), "r"(0) : "memory");
+ /* prs = 1, ric = 2, rs = 0, r = 1 is = 3 */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory");
+ asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
+ /*
+ * now switch the HID
+ */
+ hid0 = mfspr(SPRN_HID0);
+ hid0 |= HID0_POWER9_RADIX;
+ mtspr(SPRN_HID0, hid0);
+ asm volatile("isync": : :"memory");
+
+ /* Wait for it to happen */
+ while (!(mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
+ cpu_relax();
+}
+
void __init radix__early_init_mmu(void)
{
unsigned long lpcr;
@@ -345,6 +371,8 @@ void __init radix__early_init_mmu(void)
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
radix_init_native();
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ update_hid_for_radix();
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
@@ -360,6 +388,10 @@ void radix__early_init_mmu_secondary(void)
* update partition table control register and UPRT
*/
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ update_hid_for_radix();
+
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
@@ -368,6 +400,18 @@ void radix__early_init_mmu_secondary(void)
}
}
+void radix__mmu_cleanup_all(void)
+{
+ unsigned long lpcr;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
+ mtspr(SPRN_PTCR, 0);
+ radix__flush_tlb_all();
+ }
+}
+
void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size)
{
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 0b6fb244d0a1..911fdfb63ec1 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -224,7 +224,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
if (changed) {
if (!is_vm_hugetlb_page(vma))
assert_pte_locked(vma->vm_mm, address);
- __ptep_set_access_flags(ptep, entry);
+ __ptep_set_access_flags(vma->vm_mm, ptep, entry);
flush_tlb_page(vma, address);
}
return changed;
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index 9f1983404e1a..e2974fcd20f1 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -178,11 +178,9 @@ BEGIN_FTR_SECTION
END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
b slb_finish_load
-8: /* invalid EA */
- li r10,0 /* BAD_VSID */
- li r9,0 /* BAD_VSID */
- li r11,SLB_VSID_USER /* flags don't much matter */
- b slb_finish_load
+8: /* invalid EA - return an error indication */
+ crset 4*cr0+eq /* indicate failure */
+ blr
/*
* Finish loading of an SLB entry and return
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 48df05ef5231..3493cf4e0452 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -50,6 +50,8 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
__tlbiel_pid(pid, set, ric);
}
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
return;
}
@@ -83,6 +85,8 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("ptesync": : :"memory");
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
}
static inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -175,7 +179,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
if (unlikely(pid == MMU_NO_CONTEXT))
goto no_context;
- if (!mm_is_core_local(mm)) {
+ if (!mm_is_thread_local(mm)) {
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
if (lock_tlbie)
@@ -201,7 +205,7 @@ void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
if (unlikely(pid == MMU_NO_CONTEXT))
goto no_context;
- if (!mm_is_core_local(mm)) {
+ if (!mm_is_thread_local(mm)) {
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
if (lock_tlbie)
@@ -226,7 +230,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
pid = mm ? mm->context.id : 0;
if (unlikely(pid == MMU_NO_CONTEXT))
goto bail;
- if (!mm_is_core_local(mm)) {
+ if (!mm_is_thread_local(mm)) {
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
if (lock_tlbie)
@@ -321,7 +325,7 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
{
unsigned long pid;
unsigned long addr;
- int local = mm_is_core_local(mm);
+ int local = mm_is_thread_local(mm);
unsigned long ap = mmu_get_ap(psize);
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
@@ -400,3 +404,27 @@ void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
}
EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
+
+void radix__flush_tlb_all(void)
+{
+ unsigned long rb,prs,r,rs;
+ unsigned long ric = RIC_FLUSH_ALL;
+
+ rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
+ prs = 0; /* partition scoped */
+ r = 1; /* raidx format */
+ rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
+
+ asm volatile("ptesync": : :"memory");
+ /*
+ * now flush guest entries by passing PRS = 1 and LPID != 0
+ */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
+ /*
+ * now flush host entires by passing PRS = 0 and LPID == 0
+ */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}