diff options
author | Arnd Bergmann <arnd@arndb.de> | 2018-03-09 14:13:42 +0100 |
---|---|---|
committer | Arnd Bergmann <arnd@arndb.de> | 2018-03-16 10:56:03 +0100 |
commit | bb9d812643d8a121df7d614a2b9c60193a92deb0 (patch) | |
tree | 419096f57ca0501d8813151a5236387074edb4ea /arch/tile/mm | |
parent | 4ba66a9760722ccbb691b8f7116cad2f791cca7b (diff) |
arch: remove tile port
The Tile architecture port was added by Chris Metcalf in 2010, and
maintained until early 2018 when he orphaned it due to his departure
from Mellanox, and nobody else stepped up to maintain it. The product
line is still around in the form of the BlueField SoC, but no longer
uses the Tile architecture.
There are also still products for sale with Tile-GX SoCs, notably the
Mikrotik CCR router family. The products all use old (linux-3.3) kernels
with lots of patches and won't be upgraded by their manufacturers. There
have been efforts to port both OpenWRT and Debian to these, but both
projects have stalled and are very unlikely to be continued in the future.
Given that we are reasonably sure that nobody is still using the port
with an upstream kernel any more, it seems better to remove it now while
the port is in a good shape than to let it bitrot for a few years first.
Cc: Chris Metcalf <chris.d.metcalf@gmail.com>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Link: http://www.mellanox.com/page/npu_multicore_overview
Link: https://jenkins.debian.net/view/rebootstrap/job/rebootstrap_tilegx_gcc7/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Diffstat (limited to 'arch/tile/mm')
-rw-r--r-- | arch/tile/mm/Makefile | 9 | ||||
-rw-r--r-- | arch/tile/mm/elf.c | 165 | ||||
-rw-r--r-- | arch/tile/mm/extable.c | 30 | ||||
-rw-r--r-- | arch/tile/mm/fault.c | 924 | ||||
-rw-r--r-- | arch/tile/mm/highmem.c | 277 | ||||
-rw-r--r-- | arch/tile/mm/homecache.c | 428 | ||||
-rw-r--r-- | arch/tile/mm/hugetlbpage.c | 348 | ||||
-rw-r--r-- | arch/tile/mm/init.c | 956 | ||||
-rw-r--r-- | arch/tile/mm/migrate.h | 56 | ||||
-rw-r--r-- | arch/tile/mm/migrate_32.S | 192 | ||||
-rw-r--r-- | arch/tile/mm/migrate_64.S | 167 | ||||
-rw-r--r-- | arch/tile/mm/mmap.c | 93 | ||||
-rw-r--r-- | arch/tile/mm/pgtable.c | 550 |
13 files changed, 0 insertions, 4195 deletions
diff --git a/arch/tile/mm/Makefile b/arch/tile/mm/Makefile deleted file mode 100644 index e252aeddc17d..000000000000 --- a/arch/tile/mm/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# -# Makefile for the linux tile-specific parts of the memory manager. -# - -obj-y := init.o pgtable.o fault.o extable.o elf.o \ - mmap.o homecache.o migrate_$(BITS).o - -obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c deleted file mode 100644 index 889901824400..000000000000 --- a/arch/tile/mm/elf.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#include <linux/mm.h> -#include <linux/pagemap.h> -#include <linux/binfmts.h> -#include <linux/compat.h> -#include <linux/mman.h> -#include <linux/file.h> -#include <linux/elf.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> -#include <asm/sections.h> -#include <asm/vdso.h> -#include <arch/sim.h> - -/* Notify a running simulator, if any, that an exec just occurred. */ -static void sim_notify_exec(const char *binary_name) -{ - unsigned char c; - do { - c = *binary_name++; - __insn_mtspr(SPR_SIM_CONTROL, - (SIM_CONTROL_OS_EXEC - | (c << _SIM_CONTROL_OPERATOR_BITS))); - - } while (c); -} - -static int notify_exec(struct mm_struct *mm) -{ - int ret = 0; - char *buf, *path; - struct vm_area_struct *vma; - struct file *exe_file; - - if (!sim_is_simulator()) - return 1; - - buf = (char *) __get_free_page(GFP_KERNEL); - if (buf == NULL) - return 0; - - exe_file = get_mm_exe_file(mm); - if (exe_file == NULL) - goto done_free; - - path = file_path(exe_file, buf, PAGE_SIZE); - if (IS_ERR(path)) - goto done_put; - - down_read(&mm->mmap_sem); - for (vma = current->mm->mmap; ; vma = vma->vm_next) { - if (vma == NULL) { - up_read(&mm->mmap_sem); - goto done_put; - } - if (vma->vm_file == exe_file) - break; - } - - /* - * Notify simulator of an ET_DYN object so we know the load address. - * The somewhat cryptic overuse of SIM_CONTROL_DLOPEN allows us - * to be backward-compatible with older simulator releases. - */ - if (vma->vm_start == (ELF_ET_DYN_BASE & PAGE_MASK)) { - char buf[64]; - int i; - - snprintf(buf, sizeof(buf), "0x%lx:@", vma->vm_start); - for (i = 0; ; ++i) { - char c = buf[i]; - __insn_mtspr(SPR_SIM_CONTROL, - (SIM_CONTROL_DLOPEN - | (c << _SIM_CONTROL_OPERATOR_BITS))); - if (c == '\0') { - ret = 1; /* success */ - break; - } - } - } - up_read(&mm->mmap_sem); - - sim_notify_exec(path); -done_put: - fput(exe_file); -done_free: - free_page((unsigned long)buf); - return ret; -} - -/* Notify a running simulator, if any, that we loaded an interpreter. */ -static void sim_notify_interp(unsigned long load_addr) -{ - size_t i; - for (i = 0; i < sizeof(load_addr); i++) { - unsigned char c = load_addr >> (i * 8); - __insn_mtspr(SPR_SIM_CONTROL, - (SIM_CONTROL_OS_INTERP - | (c << _SIM_CONTROL_OPERATOR_BITS))); - } -} - - -int arch_setup_additional_pages(struct linux_binprm *bprm, - int executable_stack) -{ - struct mm_struct *mm = current->mm; - int retval = 0; - - /* - * Notify the simulator that an exec just occurred. - * If we can't find the filename of the mapping, just use - * whatever was passed as the linux_binprm filename. - */ - if (!notify_exec(mm)) - sim_notify_exec(bprm->filename); - - down_write(&mm->mmap_sem); - - retval = setup_vdso_pages(); - -#ifndef __tilegx__ - /* - * Set up a user-interrupt mapping here; the user can't - * create one themselves since it is above TASK_SIZE. - * We make it unwritable by default, so the model for adding - * interrupt vectors always involves an mprotect. - */ - if (!retval) { - unsigned long addr = MEM_USER_INTRPT; - addr = mmap_region(NULL, addr, INTRPT_SIZE, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0, NULL); - if (addr > (unsigned long) -PAGE_SIZE) - retval = (int) addr; - } -#endif - - up_write(&mm->mmap_sem); - - return retval; -} - - -void elf_plat_init(struct pt_regs *regs, unsigned long load_addr) -{ - /* Zero all registers. */ - memset(regs, 0, sizeof(*regs)); - - /* Report the interpreter's load address. */ - sim_notify_interp(load_addr); -} diff --git a/arch/tile/mm/extable.c b/arch/tile/mm/extable.c deleted file mode 100644 index aeaf20c7aaa4..000000000000 --- a/arch/tile/mm/extable.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#include <linux/extable.h> -#include <linux/spinlock.h> -#include <linux/uaccess.h> - -int fixup_exception(struct pt_regs *regs) -{ - const struct exception_table_entry *fixup; - - fixup = search_exception_tables(regs->pc); - if (fixup) { - regs->pc = fixup->fixup; - return 1; - } - - return 0; -} diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c deleted file mode 100644 index f58fa06a2214..000000000000 --- a/arch/tile/mm/fault.c +++ /dev/null @@ -1,924 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * From i386 code copyright (C) 1995 Linus Torvalds - */ - -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/sched/debug.h> -#include <linux/sched/task.h> -#include <linux/sched/task_stack.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/tty.h> -#include <linux/vt_kern.h> /* For unblank_screen() */ -#include <linux/highmem.h> -#include <linux/extable.h> -#include <linux/kprobes.h> -#include <linux/hugetlb.h> -#include <linux/syscalls.h> -#include <linux/uaccess.h> -#include <linux/kdebug.h> - -#include <asm/pgalloc.h> -#include <asm/sections.h> -#include <asm/traps.h> -#include <asm/syscalls.h> - -#include <arch/interrupts.h> - -static noinline void force_sig_info_fault(const char *type, int si_signo, - int si_code, unsigned long address, - int fault_num, - struct task_struct *tsk, - struct pt_regs *regs) -{ - siginfo_t info; - - if (unlikely(tsk->pid < 2)) { - panic("Signal %d (code %d) at %#lx sent to %s!", - si_signo, si_code & 0xffff, address, - is_idle_task(tsk) ? "the idle task" : "init"); - } - - info.si_signo = si_signo; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void __user *)address; - info.si_trapno = fault_num; - trace_unhandled_signal(type, regs, address, si_signo); - force_sig_info(si_signo, &info, tsk); -} - -#ifndef __tilegx__ -/* - * Synthesize the fault a PL0 process would get by doing a word-load of - * an unaligned address or a high kernel address. - */ -SYSCALL_DEFINE1(cmpxchg_badaddr, unsigned long, address) -{ - struct pt_regs *regs = current_pt_regs(); - - if (address >= PAGE_OFFSET) - force_sig_info_fault("atomic segfault", SIGSEGV, SEGV_MAPERR, - address, INT_DTLB_MISS, current, regs); - else - force_sig_info_fault("atomic alignment fault", SIGBUS, - BUS_ADRALN, address, - INT_UNALIGN_DATA, current, regs); - - /* - * Adjust pc to point at the actual instruction, which is unusual - * for syscalls normally, but is appropriate when we are claiming - * that a syscall swint1 caused a page fault or bus error. - */ - regs->pc -= 8; - - /* - * Mark this as a caller-save interrupt, like a normal page fault, - * so that when we go through the signal handler path we will - * properly restore r0, r1, and r2 for the signal handler arguments. - */ - regs->flags |= PT_FLAGS_CALLER_SAVES; - - return 0; -} -#endif - -static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) -{ - unsigned index = pgd_index(address); - pgd_t *pgd_k; - pud_t *pud, *pud_k; - pmd_t *pmd, *pmd_k; - - pgd += index; - pgd_k = init_mm.pgd + index; - - if (!pgd_present(*pgd_k)) - return NULL; - - pud = pud_offset(pgd, address); - pud_k = pud_offset(pgd_k, address); - if (!pud_present(*pud_k)) - return NULL; - - pmd = pmd_offset(pud, address); - pmd_k = pmd_offset(pud_k, address); - if (!pmd_present(*pmd_k)) - return NULL; - if (!pmd_present(*pmd)) - set_pmd(pmd, *pmd_k); - else - BUG_ON(pmd_ptfn(*pmd) != pmd_ptfn(*pmd_k)); - return pmd_k; -} - -/* - * Handle a fault on the vmalloc area. - */ -static inline int vmalloc_fault(pgd_t *pgd, unsigned long address) -{ - pmd_t *pmd_k; - pte_t *pte_k; - - /* Make sure we are in vmalloc area */ - if (!(address >= VMALLOC_START && address < VMALLOC_END)) - return -1; - - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - */ - pmd_k = vmalloc_sync_one(pgd, address); - if (!pmd_k) - return -1; - pte_k = pte_offset_kernel(pmd_k, address); - if (!pte_present(*pte_k)) - return -1; - return 0; -} - -/* Wait until this PTE has completed migration. */ -static void wait_for_migration(pte_t *pte) -{ - if (pte_migrating(*pte)) { - /* - * Wait until the migrater fixes up this pte. - * We scale the loop count by the clock rate so we'll wait for - * a few seconds here. - */ - int retries = 0; - int bound = get_clock_rate(); - while (pte_migrating(*pte)) { - barrier(); - if (++retries > bound) - panic("Hit migrating PTE (%#llx) and page PFN %#lx still migrating", - pte->val, pte_pfn(*pte)); - } - } -} - -/* - * It's not generally safe to use "current" to get the page table pointer, - * since we might be running an oprofile interrupt in the middle of a - * task switch. - */ -static pgd_t *get_current_pgd(void) -{ - HV_Context ctx = hv_inquire_context(); - unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT; - struct page *pgd_page = pfn_to_page(pgd_pfn); - BUG_ON(PageHighMem(pgd_page)); - return (pgd_t *) __va(ctx.page_table); -} - -/* - * We can receive a page fault from a migrating PTE at any time. - * Handle it by just waiting until the fault resolves. - * - * It's also possible to get a migrating kernel PTE that resolves - * itself during the downcall from hypervisor to Linux. We just check - * here to see if the PTE seems valid, and if so we retry it. - * - * NOTE! We MUST NOT take any locks for this case. We may be in an - * interrupt or a critical region, and must do as little as possible. - * Similarly, we can't use atomic ops here, since we may be handling a - * fault caused by an atomic op access. - * - * If we find a migrating PTE while we're in an NMI context, and we're - * at a PC that has a registered exception handler, we don't wait, - * since this thread may (e.g.) have been interrupted while migrating - * its own stack, which would then cause us to self-deadlock. - */ -static int handle_migrating_pte(pgd_t *pgd, int fault_num, - unsigned long address, unsigned long pc, - int is_kernel_mode, int write) -{ - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - pte_t pteval; - - if (pgd_addr_invalid(address)) - return 0; - - pgd += pgd_index(address); - pud = pud_offset(pgd, address); - if (!pud || !pud_present(*pud)) - return 0; - pmd = pmd_offset(pud, address); - if (!pmd || !pmd_present(*pmd)) - return 0; - pte = pmd_huge_page(*pmd) ? ((pte_t *)pmd) : - pte_offset_kernel(pmd, address); - pteval = *pte; - if (pte_migrating(pteval)) { - if (in_nmi() && search_exception_tables(pc)) - return 0; - wait_for_migration(pte); - return 1; - } - - if (!is_kernel_mode || !pte_present(pteval)) - return 0; - if (fault_num == INT_ITLB_MISS) { - if (pte_exec(pteval)) - return 1; - } else if (write) { - if (pte_write(pteval)) - return 1; - } else { - if (pte_read(pteval)) - return 1; - } - - return 0; -} - -/* - * This routine is responsible for faulting in user pages. - * It passes the work off to one of the appropriate routines. - * It returns true if the fault was successfully handled. - */ -static int handle_page_fault(struct pt_regs *regs, - int fault_num, - int is_page_fault, - unsigned long address, - int write) -{ - struct task_struct *tsk; - struct mm_struct *mm; - struct vm_area_struct *vma; - unsigned long stack_offset; - int fault; - int si_code; - int is_kernel_mode; - pgd_t *pgd; - unsigned int flags; - - /* on TILE, protection faults are always writes */ - if (!is_page_fault) - write = 1; - - flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; - - is_kernel_mode = !user_mode(regs); - - tsk = validate_current(); - - /* - * Check to see if we might be overwriting the stack, and bail - * out if so. The page fault code is a relatively likely - * place to get trapped in an infinite regress, and once we - * overwrite the whole stack, it becomes very hard to recover. - */ - stack_offset = stack_pointer & (THREAD_SIZE-1); - if (stack_offset < THREAD_SIZE / 8) { - pr_alert("Potential stack overrun: sp %#lx\n", stack_pointer); - show_regs(regs); - pr_alert("Killing current process %d/%s\n", - tsk->pid, tsk->comm); - do_group_exit(SIGKILL); - } - - /* - * Early on, we need to check for migrating PTE entries; - * see homecache.c. If we find a migrating PTE, we wait until - * the backing page claims to be done migrating, then we proceed. - * For kernel PTEs, we rewrite the PTE and return and retry. - * Otherwise, we treat the fault like a normal "no PTE" fault, - * rather than trying to patch up the existing PTE. - */ - pgd = get_current_pgd(); - if (handle_migrating_pte(pgd, fault_num, address, regs->pc, - is_kernel_mode, write)) - return 1; - - si_code = SEGV_MAPERR; - - /* - * We fault-in kernel-space virtual memory on-demand. The - * 'reference' page table is init_mm.pgd. - * - * NOTE! We MUST NOT take any locks for this case. We may - * be in an interrupt or a critical region, and should - * only copy the information from the master page table, - * nothing more. - * - * This verifies that the fault happens in kernel space - * and that the fault was not a protection fault. - */ - if (unlikely(address >= TASK_SIZE && - !is_arch_mappable_range(address, 0))) { - if (is_kernel_mode && is_page_fault && - vmalloc_fault(pgd, address) >= 0) - return 1; - /* - * Don't take the mm semaphore here. If we fixup a prefetch - * fault we could otherwise deadlock. - */ - mm = NULL; /* happy compiler */ - vma = NULL; - goto bad_area_nosemaphore; - } - - /* - * If we're trying to touch user-space addresses, we must - * be either at PL0, or else with interrupts enabled in the - * kernel, so either way we can re-enable interrupts here - * unless we are doing atomic access to user space with - * interrupts disabled. - */ - if (!(regs->flags & PT_FLAGS_DISABLE_IRQ)) - local_irq_enable(); - - mm = tsk->mm; - - /* - * If we're in an interrupt, have no user context or are running in an - * region with pagefaults disabled then we must not take the fault. - */ - if (pagefault_disabled() || !mm) { - vma = NULL; /* happy compiler */ - goto bad_area_nosemaphore; - } - - if (!is_kernel_mode) - flags |= FAULT_FLAG_USER; - - /* - * When running in the kernel we expect faults to occur only to - * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunately, in the case of an - * erroneous fault occurring in a code path which already holds mmap_sem - * we will deadlock attempting to validate the fault against the - * address space. Luckily the kernel only validly references user - * space from well defined areas of code, which are listed in the - * exceptions table. - * - * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibility of a deadlock. - * Attempt to lock the address space, if we cannot we then validate the - * source. If this is invalid we can skip the address space check, - * thus avoiding the deadlock. - */ - if (!down_read_trylock(&mm->mmap_sem)) { - if (is_kernel_mode && - !search_exception_tables(regs->pc)) { - vma = NULL; /* happy compiler */ - goto bad_area_nosemaphore; - } - -retry: - down_read(&mm->mmap_sem); - } - - vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (vma->vm_start <= address) - goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; - if (regs->sp < PAGE_OFFSET) { - /* - * accessing the stack below sp is always a bug. - */ - if (address < regs->sp) - goto bad_area; - } - if (expand_stack(vma, address)) - goto bad_area; - -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ -good_area: - si_code = SEGV_ACCERR; - if (fault_num == INT_ITLB_MISS) { - if (!(vma->vm_flags & VM_EXEC)) - goto bad_area; - } else if (write) { -#ifdef TEST_VERIFY_AREA - if (!is_page_fault && regs->cs == KERNEL_CS) - pr_err("WP fault at " REGFMT "\n", regs->eip); -#endif - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - flags |= FAULT_FLAG_WRITE; - } else { - if (!is_page_fault || !(vma->vm_flags & VM_READ)) - goto bad_area; - } - - /* - * If for any reason at all we couldn't handle the fault, - * make sure we exit gracefully rather than endlessly redo - * the fault. - */ - fault = handle_mm_fault(vma, address, flags); - - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) - return 0; - - if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGSEGV) - goto bad_area; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); - } - if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; - if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; - flags |= FAULT_FLAG_TRIED; - - /* - * No need to up_read(&mm->mmap_sem) as we would - * have already released it in __lock_page_or_retry - * in mm/filemap.c. - */ - goto retry; - } - } - -#if CHIP_HAS_TILE_DMA() - /* If this was a DMA TLB fault, restart the DMA engine. */ - switch (fault_num) { - case INT_DMATLB_MISS: - case INT_DMATLB_MISS_DWNCL: - case INT_DMATLB_ACCESS: - case INT_DMATLB_ACCESS_DWNCL: - __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); - break; - } -#endif - - up_read(&mm->mmap_sem); - return 1; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); - -bad_area_nosemaphore: - /* User mode accesses just cause a SIGSEGV */ - if (!is_kernel_mode) { - /* - * It's possible to have interrupts off here. - */ - local_irq_enable(); - - force_sig_info_fault("segfault", SIGSEGV, si_code, address, - fault_num, tsk, regs); - return 0; - } - -no_context: - /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) - return 0; - -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ - - bust_spinlocks(1); - - /* FIXME: no lookup_address() yet */ -#ifdef SUPPORT_LOOKUP_ADDRESS - if (fault_num == INT_ITLB_MISS) { - pte_t *pte = lookup_address(address); - - if (pte && pte_present(*pte) && !pte_exec_kernel(*pte)) - pr_crit("kernel tried to execute non-executable page - exploit attempt? (uid: %d)\n", - current->uid); - } -#endif - if (address < PAGE_SIZE) - pr_alert("Unable to handle kernel NULL pointer dereference\n"); - else - pr_alert("Unable to handle kernel paging request\n"); - pr_alert(" at virtual address " REGFMT ", pc " REGFMT "\n", - address, regs->pc); - - show_regs(regs); - - if (unlikely(tsk->pid < 2)) { - panic("Kernel page fault running %s!", - is_idle_task(tsk) ? "the idle task" : "init"); - } - - /* - * More FIXME: we should probably copy the i386 here and - * implement a generic die() routine. Not today. - */ -#ifdef SUPPORT_DIE - die("Oops", regs); -#endif - bust_spinlocks(1); - - do_group_exit(SIGKILL); - -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: - up_read(&mm->mmap_sem); - if (is_kernel_mode) - goto no_context; - pagefault_out_of_memory(); - return 0; - -do_sigbus: - up_read(&mm->mmap_sem); - - /* Kernel mode? Handle exceptions or die */ - if (is_kernel_mode) - goto no_context; - - force_sig_info_fault("bus error", SIGBUS, BUS_ADRERR, address, - fault_num, tsk, regs); - return 0; -} - -#ifndef __tilegx__ - -/* We must release ICS before panicking or we won't get anywhere. */ -#define ics_panic(fmt, ...) \ -do { \ - __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); \ - panic(fmt, ##__VA_ARGS__); \ -} while (0) - -/* - * When we take an ITLB or DTLB fault or access violation in the - * supervisor while the critical section bit is set, the hypervisor is - * reluctant to write new values into the EX_CONTEXT_K_x registers, - * since that might indicate we have not yet squirreled the SPR - * contents away and can thus safely take a recursive interrupt. - * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_K_2. - * - * Note that this routine is called before homecache_tlb_defer_enter(), - * which means that we can properly unlock any atomics that might - * be used there (good), but also means we must be very sensitive - * to not touch any data structures that might be located in memory - * that could migrate, as we could be entering the kernel on a dataplane - * cpu that has been deferring kernel TLB updates. This means, for - * example, that we can't migrate init_mm or its pgd. - */ -struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, - unsigned long address, - unsigned long info) -{ - unsigned long pc = info & ~1; - int write = info & 1; - pgd_t *pgd = get_current_pgd(); - - /* Retval is 1 at first since we will handle the fault fully. */ - struct intvec_state state = { - do_page_fault, fault_num, address, write, 1 - }; - - /* Validate that we are plausibly in the right routine. */ - if ((pc & 0x7) != 0 || pc < PAGE_OFFSET || - (fault_num != INT_DTLB_MISS && - fault_num != INT_DTLB_ACCESS)) { - unsigned long old_pc = regs->pc; - regs->pc = pc; - ics_panic("Bad ICS page fault args: old PC %#lx, fault %d/%d at %#lx", - old_pc, fault_num, write, address); - } - - /* We might be faulting on a vmalloc page, so check that first. */ - if (fault_num != INT_DTLB_ACCESS && vmalloc_fault(pgd, address) >= 0) - return state; - - /* - * If we faulted with ICS set in sys_cmpxchg, we are providing - * a user syscall service that should generate a signal on - * fault. We didn't set up a kernel stack on initial entry to - * sys_cmpxchg, but instead had one set up by the fault, which - * (because sys_cmpxchg never releases ICS) came to us via the - * SYSTEM_SAVE_K_2 mechanism, and thus EX_CONTEXT_K_[01] are - * still referencing the original user code. We release the - * atomic lock and rewrite pt_regs so that it appears that we - * came from user-space directly, and after we finish the - * fault we'll go back to user space and re-issue the swint. - * This way the backtrace information is correct if we need to - * emit a stack dump at any point while handling this. - * - * Must match register use in sys_cmpxchg(). - */ - if (pc >= (unsigned long) sys_cmpxchg && - pc < (unsigned long) __sys_cmpxchg_end) { -#ifdef CONFIG_SMP - /* Don't unlock before we could have locked. */ - if (pc >= (unsigned long)__sys_cmpxchg_grab_lock) { - int *lock_ptr = (int *)(regs->regs[ATOMIC_LOCK_REG]); - __atomic_fault_unlock(lock_ptr); - } -#endif - regs->sp = regs->regs[27]; - } - - /* - * We can also fault in the atomic assembly, in which - * case we use the exception table to do the first-level fixup. - * We may re-fixup again in the real fault handler if it - * turns out the faulting address is just bad, and not, - * for example, migrating. - */ - else if (pc >= (unsigned long) __start_atomic_asm_code && - pc < (unsigned long) __end_atomic_asm_code) { - const struct exception_table_entry *fixup; -#ifdef CONFIG_SMP - /* Unlock the atomic lock. */ - int *lock_ptr = (int *)(regs->regs[ATOMIC_LOCK_REG]); - __atomic_fault_unlock(lock_ptr); -#endif - fixup = search_exception_tables(pc); - if (!fixup) - ics_panic("ICS atomic fault not in table: PC %#lx, fault %d", - pc, fault_num); - regs->pc = fixup->fixup; - regs->ex1 = PL_ICS_EX1(KERNEL_PL, 0); - } - - /* - * Now that we have released the atomic lock (if necessary), - * it's safe to spin if the PTE that caused the fault was migrating. - */ - if (fault_num == INT_DTLB_ACCESS) - write = 1; - if (handle_migrating_pte(pgd, fault_num, address, pc, 1, write)) - return state; - - /* Return zero so that we continue on with normal fault handling. */ - state.retval = 0; - return state; -} - -#endif /* !__tilegx__ */ - -/* - * This routine handles page faults. It determines the address, and the - * problem, and then passes it handle_page_fault() for normal DTLB and - * ITLB issues, and for DMA or SN processor faults when we are in user - * space. For the latter, if we're in kernel mode, we just save the - * interrupt away appropriately and return immediately. We can't do - * page faults for user code while in kernel mode. - */ -static inline void __do_page_fault(struct pt_regs *regs, int fault_num, - unsigned long address, unsigned long write) -{ - int is_page_fault; - -#ifdef CONFIG_KPROBES - /* - * This is to notify the fault handler of the kprobes. The - * exception code is redundant as it is also carried in REGS, - * but we pass it anyhow. - */ - if (notify_die(DIE_PAGE_FAULT, "page fault", regs, -1, - regs->faultnum, SIGSEGV) == NOTIFY_STOP) - return; -#endif - -#ifdef __tilegx__ - /* - * We don't need early do_page_fault_ics() support, since unlike - * Pro we don't need to worry about unlocking the atomic locks. - * There is only one current case in GX where we touch any memory - * under ICS other than our own kernel stack, and we handle that - * here. (If we crash due to trying to touch our own stack, - * we're in too much trouble for C code to help out anyway.) - */ - if (write & ~1) { - unsigned long pc = write & ~1; - if (pc >= (unsigned long) __start_unalign_asm_code && - pc < (unsigned long) __end_unalign_asm_code) { - struct thread_info *ti = current_thread_info(); - /* - * Our EX_CONTEXT is still what it was from the - * initial unalign exception, but now we've faulted - * on the JIT page. We would like to complete the - * page fault however is appropriate, and then retry - * the instruction that caused the unalign exception. - * Our state has been "corrupted" by setting the low - * bit in "sp", and stashing r0..r3 in the - * thread_info area, so we revert all of that, then - * continue as if this were a normal page fault. - */ - regs->sp &= ~1UL; - regs->regs[0] = ti->unalign_jit_tmp[0]; - regs->regs[1] = ti->unalign_jit_tmp[1]; - regs->regs[2] = ti->unalign_jit_tmp[2]; - regs->regs[3] = ti->unalign_jit_tmp[3]; - write &= 1; - } else { - pr_alert("%s/%d: ICS set at page fault at %#lx: %#lx\n", - current->comm, current->pid, pc, address); - show_regs(regs); - do_group_exit(SIGKILL); - } - } -#else - /* This case should have been handled by do_page_fault_ics(). */ - BUG_ON(write & ~1); -#endif - -#if CHIP_HAS_TILE_DMA() - /* - * If it's a DMA fault, suspend the transfer while we're - * handling the miss; we'll restart after it's handled. If we - * don't suspend, it's possible that this process could swap - * out and back in, and restart the engine since the DMA is - * still 'running'. - */ - if (fault_num == INT_DMATLB_MISS || - fault_num == INT_DMATLB_ACCESS || - fault_num == INT_DMATLB_MISS_DWNCL || - fault_num == INT_DMATLB_ACCESS_DWNCL) { - __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__SUSPEND_MASK); - while (__insn_mfspr(SPR_DMA_USER_STATUS) & - SPR_DMA_STATUS__BUSY_MASK) - ; - } -#endif - - /* Validate fault num and decide if this is a first-time page fault. */ - switch (fault_num) { - case INT_ITLB_MISS: - case INT_DTLB_MISS: -#if CHIP_HAS_TILE_DMA() - case INT_DMATLB_MISS: - case INT_DMATLB_MISS_DWNCL: -#endif - is_page_fault = 1; - break; - - case INT_DTLB_ACCESS: -#if CHIP_HAS_TILE_DMA() - case INT_DMATLB_ACCESS: - case INT_DMATLB_ACCESS_DWNCL: -#endif - is_page_fault = 0; - break; - - default: - panic("Bad fault number %d in do_page_fault", fault_num); - } - -#if CHIP_HAS_TILE_DMA() - if (!user_mode(regs)) { - struct async_tlb *async; - switch (fault_num) { -#if CHIP_HAS_TILE_DMA() - case INT_DMATLB_MISS: - case INT_DMATLB_ACCESS: - case INT_DMATLB_MISS_DWNCL: - case INT_DMATLB_ACCESS_DWNCL: - async = ¤t->thread.dma_async_tlb; - break; -#endif - default: - async = NULL; - } - if (async) { - - /* - * No vmalloc check required, so we can allow - * interrupts immediately at this point. - */ - local_irq_enable(); - - set_thread_flag(TIF_ASYNC_TLB); - if (async->fault_num != 0) { - panic("Second async fault %d; old fault was %d (%#lx/%ld)", - fault_num, async->fault_num, - address, write); - } - BUG_ON(fault_num == 0); - async->fault_num = fault_num; - async->is_fault = is_page_fault; - async->is_write = write; - async->address = address; - return; - } - } -#endif - - handle_page_fault(regs, fault_num, is_page_fault, address, write); -} - -void do_page_fault(struct pt_regs *regs, int fault_num, - unsigned long address, unsigned long write) -{ - __do_page_fault(regs, fault_num, address, write); -} - -#if CHIP_HAS_TILE_DMA() -/* - * This routine effectively re-issues asynchronous page faults - * when we are returning to user space. - */ -void do_async_page_fault(struct pt_regs *regs) -{ - struct async_tlb *async = ¤t->thread.dma_async_tlb; - - /* - * Clear thread flag early. If we re-interrupt while processing - * code here, we will reset it and recall this routine before - * returning to user space. - */ - clear_thread_flag(TIF_ASYNC_TLB); - - if (async->fault_num) { - /* - * Clear async->fault_num before calling the page-fault - * handler so that if we re-interrupt before returning - * from the function we have somewhere to put the - * information from the new interrupt. - */ - int fault_num = async->fault_num; - async->fault_num = 0; - handle_page_fault(regs, fault_num, async->is_fault, - async->address, async->is_write); - } -} -#endif /* CHIP_HAS_TILE_DMA() */ - - -void vmalloc_sync_all(void) -{ -#ifdef __tilegx__ - /* Currently all L1 kernel pmd's are static and shared. */ - BUILD_BUG_ON(pgd_index(VMALLOC_END - PAGE_SIZE) != - pgd_index(VMALLOC_START)); -#else - /* - * Note that races in the updates of insync and start aren't - * problematic: insync can only get set bits added, and updates to - * start are only improving performance (without affecting correctness - * if undone). - */ - static DECLARE_BITMAP(insync, PTRS_PER_PGD); - static unsigned long start = PAGE_OFFSET; - unsigned long address; - - BUILD_BUG_ON(PAGE_OFFSET & ~PGDIR_MASK); - for (address = start; address >= PAGE_OFFSET; address += PGDIR_SIZE) { - if (!test_bit(pgd_index(address), insync)) { - unsigned long flags; - struct list_head *pos; - - spin_lock_irqsave(&pgd_lock, flags); - list_for_each(pos, &pgd_list) - if (!vmalloc_sync_one(list_to_pgd(pos), - address)) { - /* Must be at first entry in list. */ - BUG_ON(pos != pgd_list.next); - break; - } - spin_unlock_irqrestore(&pgd_lock, flags); - if (pos != pgd_list.next) - set_bit(pgd_index(address), insync); - } - if (address == start && test_bit(pgd_index(address), insync)) - start = address + PGDIR_SIZE; - } -#endif -} diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c deleted file mode 100644 index eca28551b22d..000000000000 --- a/arch/tile/mm/highmem.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#include <linux/highmem.h> -#include <linux/module.h> -#include <linux/pagemap.h> -#include <asm/homecache.h> - -#define kmap_get_pte(vaddr) \ - pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)),\ - (vaddr)), (vaddr)) - - -void *kmap(struct page *page) -{ - void *kva; - unsigned long flags; - pte_t *ptep; - - might_sleep(); - if (!PageHighMem(page)) - return page_address(page); - kva = kmap_high(page); - - /* - * Rewrite the PTE under the lock. This ensures that the page - * is not currently migrating. - */ - ptep = kmap_get_pte((unsigned long)kva); - flags = homecache_kpte_lock(); - set_pte_at(&init_mm, kva, ptep, mk_pte(page, page_to_kpgprot(page))); - homecache_kpte_unlock(flags); - - return kva; -} -EXPORT_SYMBOL(kmap); - -void kunmap(struct page *page) -{ - if (in_interrupt()) - BUG(); - if (!PageHighMem(page)) - return; - kunmap_high(page); -} -EXPORT_SYMBOL(kunmap); - -/* - * Describe a single atomic mapping of a page on a given cpu at a - * given address, and allow it to be linked into a list. - */ -struct atomic_mapped_page { - struct list_head list; - struct page *page; - int cpu; - unsigned long va; -}; - -static spinlock_t amp_lock = __SPIN_LOCK_UNLOCKED(&_lock); -static struct list_head amp_list = LIST_HEAD_INIT(amp_list); - -/* - * Combining this structure with a per-cpu declaration lets us give - * each cpu an atomic_mapped_page structure per type. - */ -struct kmap_amps { - struct atomic_mapped_page per_type[KM_TYPE_NR]; -}; -static DEFINE_PER_CPU(struct kmap_amps, amps); - -/* - * Add a page and va, on this cpu, to the list of kmap_atomic pages, - * and write the new pte to memory. Writing the new PTE under the - * lock guarantees that it is either on the list before migration starts - * (if we won the race), or set_pte() sets the migrating bit in the PTE - * (if we lost the race). And doing it under the lock guarantees - * that when kmap_atomic_fix_one_pte() comes along, it finds a valid - * PTE in memory, iff the mapping is still on the amp_list. - * - * Finally, doing it under the lock lets us safely examine the page - * to see if it is immutable or not, for the generic kmap_atomic() case. - * If we examine it earlier we are exposed to a race where it looks - * writable earlier, but becomes immutable before we write the PTE. - */ -static void kmap_atomic_register(struct page *page, int type, - unsigned long va, pte_t *ptep, pte_t pteval) -{ - unsigned long flags; - struct atomic_mapped_page *amp; - - flags = homecache_kpte_lock(); - spin_lock(&_lock); - - /* With interrupts disabled, now fill in the per-cpu info. */ - amp = this_cpu_ptr(&s.per_type[type]); - amp->page = page; - amp->cpu = smp_processor_id(); - amp->va = va; - - /* For generic kmap_atomic(), choose the PTE writability now. */ - if (!pte_read(pteval)) - pteval = mk_pte(page, page_to_kpgprot(page)); - - list_add(&->list, &_list); - set_pte(ptep, pteval); - - spin_unlock(&_lock); - homecache_kpte_unlock(flags); -} - -/* - * Remove a page and va, on this cpu, from the list of kmap_atomic pages. - * Linear-time search, but we count on the lists being short. - * We don't need to adjust the PTE under the lock (as opposed to the - * kmap_atomic_register() case), since we're just unconditionally - * zeroing the PTE after it's off the list. - */ -static void kmap_atomic_unregister(struct page *page, unsigned long va) -{ - unsigned long flags; - struct atomic_mapped_page *amp; - int cpu = smp_processor_id(); - spin_lock_irqsave(&_lock, flags); - list_for_each_entry(amp, &_list, list) { - if (amp->page == page && amp->cpu == cpu && amp->va == va) - break; - } - BUG_ON(&->list == &_list); - list_del(&->list); - spin_unlock_irqrestore(&_lock, flags); -} - -/* Helper routine for kmap_atomic_fix_kpte(), below. */ -static void kmap_atomic_fix_one_kpte(struct atomic_mapped_page *amp, - int finished) -{ - pte_t *ptep = kmap_get_pte(amp->va); - if (!finished) { - set_pte(ptep, pte_mkmigrate(*ptep)); - flush_remote(0, 0, NULL, amp->va, PAGE_SIZE, PAGE_SIZE, - cpumask_of(amp->cpu), NULL, 0); - } else { - /* - * Rewrite a default kernel PTE for this page. - * We rely on the fact that set_pte() writes the - * present+migrating bits last. - */ - pte_t pte = mk_pte(amp->page, page_to_kpgprot(amp->page)); - set_pte(ptep, pte); - } -} - -/* - * This routine is a helper function for homecache_fix_kpte(); see - * its comments for more information on the "finished" argument here. - * - * Note that we hold the lock while doing the remote flushes, which - * will stall any unrelated cpus trying to do kmap_atomic operations. - * We could just update the PTEs under the lock, and save away copies - * of the structs (or just the va+cpu), then flush them after we - * release the lock, but it seems easier just to do it all under the lock. - */ -void kmap_atomic_fix_kpte(struct page *page, int finished) -{ - struct atomic_mapped_page *amp; - unsigned long flags; - spin_lock_irqsave(&_lock, flags); - list_for_each_entry(amp, &_list, list) { - if (amp->page == page) - kmap_atomic_fix_one_kpte(amp, finished); - } - spin_unlock_irqrestore(&_lock, flags); -} - -/* - * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap - * because the kmap code must perform a global TLB invalidation when - * the kmap pool wraps. - * - * Note that they may be slower than on x86 (etc.) because unlike on - * those platforms, we do have to take a global lock to map and unmap - * pages on Tile (see above). - * - * When holding an atomic kmap is is not legal to sleep, so atomic - * kmaps are appropriate for short, tight code paths only. - */ -void *kmap_atomic_prot(struct page *page, pgprot_t prot) -{ - unsigned long vaddr; - int idx, type; - pte_t *pte; - - preempt_disable(); - pagefault_disable(); - - /* Avoid icache flushes by disallowing atomic executable mappings. */ - BUG_ON(pte_exec(prot)); - - if (!PageHighMem(page)) - return page_address(page); - - type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR*smp_processor_id(); - vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); - pte = kmap_get_pte(vaddr); - BUG_ON(!pte_none(*pte)); - - /* Register that this page is mapped atomically on this cpu. */ - kmap_atomic_register(page, type, vaddr, pte, mk_pte(page, prot)); - - return (void *)vaddr; -} -EXPORT_SYMBOL(kmap_atomic_prot); - -void *kmap_atomic(struct page *page) -{ - /* PAGE_NONE is a magic value that tells us to check immutability. */ - return kmap_atomic_prot(page, PAGE_NONE); -} -EXPORT_SYMBOL(kmap_atomic); - -void __kunmap_atomic(void *kvaddr) -{ - unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - - if (vaddr >= __fix_to_virt(FIX_KMAP_END) && - vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { - pte_t *pte = kmap_get_pte(vaddr); - pte_t pteval = *pte; - int idx, type; - - type = kmap_atomic_idx(); - idx = type + KM_TYPE_NR*smp_processor_id(); - - /* - * Force other mappings to Oops if they try to access this pte - * without first remapping it. Keeping stale mappings around - * is a bad idea. - */ - BUG_ON(!pte_present(pteval) && !pte_migrating(pteval)); - kmap_atomic_unregister(pte_page(pteval), vaddr); - kpte_clear_flush(pte, vaddr); - kmap_atomic_idx_pop(); - } else { - /* Must be a lowmem page */ - BUG_ON(vaddr < PAGE_OFFSET); - BUG_ON(vaddr >= (unsigned long)high_memory); - } - - pagefault_enable(); - preempt_enable(); -} -EXPORT_SYMBOL(__kunmap_atomic); - -/* - * This API is supposed to allow us to map memory without a "struct page". - * Currently we don't support this, though this may change in the future. - */ -void *kmap_atomic_pfn(unsigned long pfn) -{ - return kmap_atomic(pfn_to_page(pfn)); -} -void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) -{ - return kmap_atomic_prot(pfn_to_page(pfn), prot); -} diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c deleted file mode 100644 index 4432f31e8479..000000000000 --- a/arch/tile/mm/homecache.c +++ /dev/null @@ -1,428 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * This code maintains the "home" for each page in the system. - */ - -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/spinlock.h> -#include <linux/list.h> -#include <linux/bootmem.h> -#include <linux/rmap.h> -#include <linux/pagemap.h> -#include <linux/mutex.h> -#include <linux/interrupt.h> -#include <linux/sysctl.h> -#include <linux/pagevec.h> -#include <linux/ptrace.h> -#include <linux/timex.h> -#include <linux/cache.h> -#include <linux/smp.h> -#include <linux/module.h> -#include <linux/hugetlb.h> - -#include <asm/page.h> -#include <asm/sections.h> -#include <asm/tlbflush.h> -#include <asm/pgalloc.h> -#include <asm/homecache.h> - -#include <arch/sim.h> - -#include "migrate.h" - - -/* - * The noallocl2 option suppresses all use of the L2 cache to cache - * locally from a remote home. - */ -static int __ro_after_init noallocl2; -static int __init set_noallocl2(char *str) -{ - noallocl2 = 1; - return 0; -} -early_param("noallocl2", set_noallocl2); - - -/* - * Update the irq_stat for cpus that we are going to interrupt - * with TLB or cache flushes. Also handle removing dataplane cpus - * from the TLB flush set, and setting dataplane_tlb_state instead. - */ -static void hv_flush_update(const struct cpumask *cache_cpumask, - struct cpumask *tlb_cpumask, - unsigned long tlb_va, unsigned long tlb_length, - HV_Remote_ASID *asids, int asidcount) -{ - struct cpumask mask; - int i, cpu; - - cpumask_clear(&mask); - if (cache_cpumask) - cpumask_or(&mask, &mask, cache_cpumask); - if (tlb_cpumask && tlb_length) { - cpumask_or(&mask, &mask, tlb_cpumask); - } - - for (i = 0; i < asidcount; ++i) - cpumask_set_cpu(asids[i].y * smp_width + asids[i].x, &mask); - - /* - * Don't bother to update atomically; losing a count - * here is not that critical. - */ - for_each_cpu(cpu, &mask) - ++per_cpu(irq_stat, cpu).irq_hv_flush_count; -} - -/* - * This wrapper function around hv_flush_remote() does several things: - * - * - Provides a return value error-checking panic path, since - * there's never any good reason for hv_flush_remote() to fail. - * - Accepts a 32-bit PFN rather than a 64-bit PA, which generally - * is the type that Linux wants to pass around anyway. - * - Canonicalizes that lengths of zero make cpumasks NULL. - * - Handles deferring TLB flushes for dataplane tiles. - * - Tracks remote interrupts in the per-cpu irq_cpustat_t. - * - * Note that we have to wait until the cache flush completes before - * updating the per-cpu last_cache_flush word, since otherwise another - * concurrent flush can race, conclude the flush has already - * completed, and start to use the page while it's still dirty - * remotely (running concurrently with the actual evict, presumably). - */ -void flush_remote(unsigned long cache_pfn, unsigned long cache_control, - const struct cpumask *cache_cpumask_orig, - HV_VirtAddr tlb_va, unsigned long tlb_length, - unsigned long tlb_pgsize, - const struct cpumask *tlb_cpumask_orig, - HV_Remote_ASID *asids, int asidcount) -{ - int rc; - struct cpumask cache_cpumask_copy, tlb_cpumask_copy; - struct cpumask *cache_cpumask, *tlb_cpumask; - HV_PhysAddr cache_pa; - - mb(); /* provided just to simplify "magic hypervisor" mode */ - - /* - * Canonicalize and copy the cpumasks. - */ - if (cache_cpumask_orig && cache_control) { - cpumask_copy(&cache_cpumask_copy, cache_cpumask_orig); - cache_cpumask = &cache_cpumask_copy; - } else { - cpumask_clear(&cache_cpumask_copy); - cache_cpumask = NULL; - } - if (cache_cpumask == NULL) - cache_control = 0; - if (tlb_cpumask_orig && tlb_length) { - cpumask_copy(&tlb_cpumask_copy, tlb_cpumask_orig); - tlb_cpumask = &tlb_cpumask_copy; - } else { - cpumask_clear(&tlb_cpumask_copy); - tlb_cpumask = NULL; - } - - hv_flush_update(cache_cpumask, tlb_cpumask, tlb_va, tlb_length, - asids, asidcount); - cache_pa = (HV_PhysAddr)cache_pfn << PAGE_SHIFT; - rc = hv_flush_remote(cache_pa, cache_control, - cpumask_bits(cache_cpumask), - tlb_va, tlb_length, tlb_pgsize, - cpumask_bits(tlb_cpumask), - asids, asidcount); - if (rc == 0) - return; - - pr_err("hv_flush_remote(%#llx, %#lx, %p [%*pb], %#lx, %#lx, %#lx, %p [%*pb], %p, %d) = %d\n", - cache_pa, cache_control, cache_cpumask, - cpumask_pr_args(&cache_cpumask_copy), - (unsigned long)tlb_va, tlb_length, tlb_pgsize, tlb_cpumask, - cpumask_pr_args(&tlb_cpumask_copy), asids, asidcount, rc); - panic("Unsafe to continue."); -} - -static void homecache_finv_page_va(void* va, int home) -{ - int cpu = get_cpu(); - if (home == cpu) { - finv_buffer_local(va, PAGE_SIZE); - } else if (home == PAGE_HOME_HASH) { - finv_buffer_remote(va, PAGE_SIZE, 1); - } else { - BUG_ON(home < 0 || home >= NR_CPUS); - finv_buffer_remote(va, PAGE_SIZE, 0); - } - put_cpu(); -} - -void homecache_finv_map_page(struct page *page, int home) -{ - unsigned long flags; - unsigned long va; - pte_t *ptep; - pte_t pte; - - if (home == PAGE_HOME_UNCACHED) - return; - local_irq_save(flags); -#ifdef CONFIG_HIGHMEM - va = __fix_to_virt(FIX_KMAP_BEGIN + kmap_atomic_idx_push() + - (KM_TYPE_NR * smp_processor_id())); -#else - va = __fix_to_virt(FIX_HOMECACHE_BEGIN + smp_processor_id()); -#endif - ptep = virt_to_kpte(va); - pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL); - __set_pte(ptep, pte_set_home(pte, home)); - homecache_finv_page_va((void *)va, home); - __pte_clear(ptep); - hv_flush_page(va, PAGE_SIZE); -#ifdef CONFIG_HIGHMEM - kmap_atomic_idx_pop(); -#endif - local_irq_restore(flags); -} - -static void homecache_finv_page_home(struct page *page, int home) -{ - if (!PageHighMem(page) && home == page_home(page)) - homecache_finv_page_va(page_address(page), home); - else - homecache_finv_map_page(page, home); -} - -static inline bool incoherent_home(int home) -{ - return home == PAGE_HOME_IMMUTABLE || home == PAGE_HOME_INCOHERENT; -} - -static void homecache_finv_page_internal(struct page *page, int force_map) -{ - int home = page_home(page); - if (home == PAGE_HOME_UNCACHED) - return; - if (incoherent_home(home)) { - int cpu; - for_each_cpu(cpu, &cpu_cacheable_map) - homecache_finv_map_page(page, cpu); - } else if (force_map) { - /* Force if, e.g., the normal mapping is migrating. */ - homecache_finv_map_page(page, home); - } else { - homecache_finv_page_home(page, home); - } - sim_validate_lines_evicted(PFN_PHYS(page_to_pfn(page)), PAGE_SIZE); -} - -void homecache_finv_page(struct page *page) -{ - homecache_finv_page_internal(page, 0); -} - -void homecache_evict(const struct cpumask *mask) -{ - flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); -} - -/* Report the home corresponding to a given PTE. */ -static int pte_to_home(pte_t pte) -{ - if (hv_pte_get_nc(pte)) - return PAGE_HOME_IMMUTABLE; - switch (hv_pte_get_mode(pte)) { - case HV_PTE_MODE_CACHE_TILE_L3: - return get_remote_cache_cpu(pte); - case HV_PTE_MODE_CACHE_NO_L3: - return PAGE_HOME_INCOHERENT; - case HV_PTE_MODE_UNCACHED: - return PAGE_HOME_UNCACHED; - case HV_PTE_MODE_CACHE_HASH_L3: - return PAGE_HOME_HASH; - } - panic("Bad PTE %#llx\n", pte.val); -} - -/* Update the home of a PTE if necessary (can also be used for a pgprot_t). */ -pte_t pte_set_home(pte_t pte, int home) -{ -#if CHIP_HAS_MMIO() - /* Check for MMIO mappings and pass them through. */ - if (hv_pte_get_mode(pte) == HV_PTE_MODE_MMIO) - return pte; -#endif - - - /* - * Only immutable pages get NC mappings. If we have a - * non-coherent PTE, but the underlying page is not - * immutable, it's likely the result of a forced - * caching setting running up against ptrace setting - * the page to be writable underneath. In this case, - * just keep the PTE coherent. - */ - if (hv_pte_get_nc(pte) && home != PAGE_HOME_IMMUTABLE) { - pte = hv_pte_clear_nc(pte); - pr_err("non-immutable page incoherently referenced: %#llx\n", - pte.val); - } - - switch (home) { - - case PAGE_HOME_UNCACHED: - pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); - break; - - case PAGE_HOME_INCOHERENT: - pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); - break; - - case PAGE_HOME_IMMUTABLE: - /* - * We could home this page anywhere, since it's immutable, - * but by default just home it to follow "hash_default". - */ - BUG_ON(hv_pte_get_writable(pte)); - if (pte_get_forcecache(pte)) { - /* Upgrade "force any cpu" to "No L3" for immutable. */ - if (hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_TILE_L3 - && pte_get_anyhome(pte)) { - pte = hv_pte_set_mode(pte, - HV_PTE_MODE_CACHE_NO_L3); - } - } else - if (hash_default) - pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); - else - pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); - pte = hv_pte_set_nc(pte); - break; - - case PAGE_HOME_HASH: - pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); - break; - - default: - BUG_ON(home < 0 || home >= NR_CPUS || - !cpu_is_valid_lotar(home)); - pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_TILE_L3); - pte = set_remote_cache_cpu(pte, home); - break; - } - - if (noallocl2) - pte = hv_pte_set_no_alloc_l2(pte); - - /* Simplify "no local and no l3" to "uncached" */ - if (hv_pte_get_no_alloc_l2(pte) && hv_pte_get_no_alloc_l1(pte) && - hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) { - pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); - } - - /* Checking this case here gives a better panic than from the hv. */ - BUG_ON(hv_pte_get_mode(pte) == 0); - - return pte; -} -EXPORT_SYMBOL(pte_set_home); - -/* - * The routines in this section are the "static" versions of the normal - * dynamic homecaching routines; they just set the home cache - * of a kernel page once, and require a full-chip cache/TLB flush, - * so they're not suitable for anything but infrequent use. - */ - -int page_home(struct page *page) -{ - if (PageHighMem(page)) { - return PAGE_HOME_HASH; - } else { - unsigned long kva = (unsigned long)page_address(page); - return pte_to_home(*virt_to_kpte(kva)); - } -} -EXPORT_SYMBOL(page_home); - -void homecache_change_page_home(struct page *page, int order, int home) -{ - int i, pages = (1 << order); - unsigned long kva; - - BUG_ON(PageHighMem(page)); - BUG_ON(page_count(page) > 1); - BUG_ON(page_mapcount(page) != 0); - kva = (unsigned long) page_address(page); - flush_remote(0, HV_FLUSH_EVICT_L2, &cpu_cacheable_map, - kva, pages * PAGE_SIZE, PAGE_SIZE, cpu_online_mask, - NULL, 0); - - for (i = 0; i < pages; ++i, kva += PAGE_SIZE) { - pte_t *ptep = virt_to_kpte(kva); - pte_t pteval = *ptep; - BUG_ON(!pte_present(pteval) || pte_huge(pteval)); - __set_pte(ptep, pte_set_home(pteval, home)); - } -} -EXPORT_SYMBOL(homecache_change_page_home); - -struct page *homecache_alloc_pages(gfp_t gfp_mask, - unsigned int order, int home) -{ - struct page *page; - BUG_ON(gfp_mask & __GFP_HIGHMEM); /* must be lowmem */ - page = alloc_pages(gfp_mask, order); - if (page) - homecache_change_page_home(page, order, home); - return page; -} -EXPORT_SYMBOL(homecache_alloc_pages); - -struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask, - unsigned int order, int home) -{ - struct page *page; - BUG_ON(gfp_mask & __GFP_HIGHMEM); /* must be lowmem */ - page = alloc_pages_node(nid, gfp_mask, order); - if (page) - homecache_change_page_home(page, order, home); - return page; -} - -void __homecache_free_pages(struct page *page, unsigned int order) -{ - if (put_page_testzero(page)) { - homecache_change_page_home(page, order, PAGE_HOME_HASH); - if (order == 0) { - free_unref_page(page); - } else { - init_page_count(page); - __free_pages(page, order); - } - } -} -EXPORT_SYMBOL(__homecache_free_pages); - -void homecache_free_pages(unsigned long addr, unsigned int order) -{ - if (addr != 0) { - VM_BUG_ON(!virt_addr_valid((void *)addr)); - __homecache_free_pages(virt_to_page((void *)addr), order); - } -} -EXPORT_SYMBOL(homecache_free_pages); diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c deleted file mode 100644 index 0986d426a413..000000000000 --- a/arch/tile/mm/hugetlbpage.c +++ /dev/null @@ -1,348 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * TILE Huge TLB Page Support for Kernel. - * Taken from i386 hugetlb implementation: - * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> - */ - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/sched/mm.h> -#include <linux/hugetlb.h> -#include <linux/pagemap.h> -#include <linux/slab.h> -#include <linux/err.h> -#include <linux/sysctl.h> -#include <linux/mman.h> -#include <asm/tlb.h> -#include <asm/tlbflush.h> -#include <asm/setup.h> - -#ifdef CONFIG_HUGETLB_SUPER_PAGES - -/* - * Provide an additional huge page size (in addition to the regular default - * huge page size) if no "hugepagesz" arguments are specified. - * Note that it must be smaller than the default huge page size so - * that it's possible to allocate them on demand from the buddy allocator. - * You can change this to 64K (on a 16K build), 256K, 1M, or 4M, - * or not define it at all. - */ -#define ADDITIONAL_HUGE_SIZE (1024 * 1024UL) - -/* "Extra" page-size multipliers, one per level of the page table. */ -int huge_shift[HUGE_SHIFT_ENTRIES] = { -#ifdef ADDITIONAL_HUGE_SIZE -#define ADDITIONAL_HUGE_SHIFT __builtin_ctzl(ADDITIONAL_HUGE_SIZE / PAGE_SIZE) - [HUGE_SHIFT_PAGE] = ADDITIONAL_HUGE_SHIFT -#endif -}; - -#endif - -pte_t *huge_pte_alloc(struct mm_struct *mm, - unsigned long addr, unsigned long sz) -{ - pgd_t *pgd; - pud_t *pud; - - addr &= -sz; /* Mask off any low bits in the address. */ - - pgd = pgd_offset(mm, addr); - pud = pud_alloc(mm, pgd, addr); - -#ifdef CONFIG_HUGETLB_SUPER_PAGES - if (sz >= PGDIR_SIZE) { - BUG_ON(sz != PGDIR_SIZE && - sz != PGDIR_SIZE << huge_shift[HUGE_SHIFT_PGDIR]); - return (pte_t *)pud; - } else { - pmd_t *pmd = pmd_alloc(mm, pud, addr); - if (sz >= PMD_SIZE) { - BUG_ON(sz != PMD_SIZE && - sz != (PMD_SIZE << huge_shift[HUGE_SHIFT_PMD])); - return (pte_t *)pmd; - } - else { - if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE]) - panic("Unexpected page size %#lx\n", sz); - return pte_alloc_map(mm, pmd, addr); - } - } -#else - BUG_ON(sz != PMD_SIZE); - return (pte_t *) pmd_alloc(mm, pud, addr); -#endif -} - -static pte_t *get_pte(pte_t *base, int index, int level) -{ - pte_t *ptep = base + index; -#ifdef CONFIG_HUGETLB_SUPER_PAGES - if (!pte_present(*ptep) && huge_shift[level] != 0) { - unsigned long mask = -1UL << huge_shift[level]; - pte_t *super_ptep = base + (index & mask); - pte_t pte = *super_ptep; - if (pte_present(pte) && pte_super(pte)) - ptep = super_ptep; - } -#endif - return ptep; -} - -pte_t *huge_pte_offset(struct mm_struct *mm, - unsigned long addr, unsigned long sz) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; -#ifdef CONFIG_HUGETLB_SUPER_PAGES - pte_t *pte; -#endif - - /* Get the top-level page table entry. */ - pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0); - - /* We don't have four levels. */ - pud = pud_offset(pgd, addr); -#ifndef __PAGETABLE_PUD_FOLDED -# error support fourth page table level -#endif - if (!pud_present(*pud)) - return NULL; - - /* Check for an L0 huge PTE, if we have three levels. */ -#ifndef __PAGETABLE_PMD_FOLDED - if (pud_huge(*pud)) - return (pte_t *)pud; - - pmd = (pmd_t *)get_pte((pte_t *)pud_page_vaddr(*pud), - pmd_index(addr), 1); - if (!pmd_present(*pmd)) - return NULL; -#else - pmd = pmd_offset(pud, addr); -#endif - - /* Check for an L1 huge PTE. */ - if (pmd_huge(*pmd)) - return (pte_t *)pmd; - -#ifdef CONFIG_HUGETLB_SUPER_PAGES - /* Check for an L2 huge PTE. */ - pte = get_pte((pte_t *)pmd_page_vaddr(*pmd), pte_index(addr), 2); - if (!pte_present(*pte)) - return NULL; - if (pte_super(*pte)) - return pte; -#endif - - return NULL; -} - -int pmd_huge(pmd_t pmd) -{ - return !!(pmd_val(pmd) & _PAGE_HUGE_PAGE); -} - -int pud_huge(pud_t pud) -{ - return !!(pud_val(pud) & _PAGE_HUGE_PAGE); -} - -#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA -static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, - unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct vm_unmapped_area_info info; - - info.flags = 0; - info.length = len; - info.low_limit = TASK_UNMAPPED_BASE; - info.high_limit = TASK_SIZE; - info.align_mask = PAGE_MASK & ~huge_page_mask(h); - info.align_offset = 0; - return vm_unmapped_area(&info); -} - -static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, - unsigned long addr0, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct vm_unmapped_area_info info; - unsigned long addr; - - info.flags = VM_UNMAPPED_AREA_TOPDOWN; - info.length = len; - info.low_limit = PAGE_SIZE; - info.high_limit = current->mm->mmap_base; - info.align_mask = PAGE_MASK & ~huge_page_mask(h); - info.align_offset = 0; - addr = vm_unmapped_area(&info); - - /* - * A failed mmap() very likely causes application failure, - * so fall back to the bottom-up function here. This scenario - * can happen with large stack limits and large mmap() - * allocations. - */ - if (addr & ~PAGE_MASK) { - VM_BUG_ON(addr != -ENOMEM); - info.flags = 0; - info.low_limit = TASK_UNMAPPED_BASE; - info.high_limit = TASK_SIZE; - addr = vm_unmapped_area(&info); - } - - return addr; -} - -unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - struct hstate *h = hstate_file(file); - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - - if (len & ~huge_page_mask(h)) - return -EINVAL; - if (len > TASK_SIZE) - return -ENOMEM; - - if (flags & MAP_FIXED) { - if (prepare_hugepage_range(file, addr, len)) - return -EINVAL; - return addr; - } - - if (addr) { - addr = ALIGN(addr, huge_page_size(h)); - vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vm_start_gap(vma))) - return addr; - } - if (current->mm->get_unmapped_area == arch_get_unmapped_area) - return hugetlb_get_unmapped_area_bottomup(file, addr, len, - pgoff, flags); - else - return hugetlb_get_unmapped_area_topdown(file, addr, len, - pgoff, flags); -} -#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ - -#ifdef CONFIG_HUGETLB_SUPER_PAGES -static __init int __setup_hugepagesz(unsigned long ps) -{ - int log_ps = __builtin_ctzl(ps); - int level, base_shift; - - if ((1UL << log_ps) != ps || (log_ps & 1) != 0) { - pr_warn("Not enabling %ld byte huge pages; must be a power of four\n", - ps); - return -EINVAL; - } - - if (ps > 64*1024*1024*1024UL) { - pr_warn("Not enabling %ld MB huge pages; largest legal value is 64 GB\n", - ps >> 20); - return -EINVAL; - } else if (ps >= PUD_SIZE) { - static long hv_jpage_size; - if (hv_jpage_size == 0) - hv_jpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO); - if (hv_jpage_size != PUD_SIZE) { - pr_warn("Not enabling >= %ld MB huge pages: hypervisor reports size %ld\n", - PUD_SIZE >> 20, hv_jpage_size); - return -EINVAL; - } - level = 0; - base_shift = PUD_SHIFT; - } else if (ps >= PMD_SIZE) { - level = 1; - base_shift = PMD_SHIFT; - } else if (ps > PAGE_SIZE) { - level = 2; - base_shift = PAGE_SHIFT; - } else { - pr_err("hugepagesz: huge page size %ld too small\n", ps); - return -EINVAL; - } - - if (log_ps != base_shift) { - int shift_val = log_ps - base_shift; - if (huge_shift[level] != 0) { - int old_shift = base_shift + huge_shift[level]; - pr_warn("Not enabling %ld MB huge pages; already have size %ld MB\n", - ps >> 20, (1UL << old_shift) >> 20); - return -EINVAL; - } - if (hv_set_pte_super_shift(level, shift_val) != 0) { - pr_warn("Not enabling %ld MB huge pages; no hypervisor support\n", - ps >> 20); - return -EINVAL; - } - printk(KERN_DEBUG "Enabled %ld MB huge pages\n", ps >> 20); - huge_shift[level] = shift_val; - } - - hugetlb_add_hstate(log_ps - PAGE_SHIFT); - - return 0; -} - -static bool saw_hugepagesz; - -static __init int setup_hugepagesz(char *opt) -{ - int rc; - - if (!saw_hugepagesz) { - saw_hugepagesz = true; - memset(huge_shift, 0, sizeof(huge_shift)); - } - rc = __setup_hugepagesz(memparse(opt, NULL)); - if (rc) - hugetlb_bad_size(); - return rc; -} -__setup("hugepagesz=", setup_hugepagesz); - -#ifdef ADDITIONAL_HUGE_SIZE -/* - * Provide an additional huge page size if no "hugepagesz" args are given. - * In that case, all the cores have properly set up their hv super_shift - * already, but we need to notify the hugetlb code to enable the - * new huge page size from the Linux point of view. - */ -static __init int add_default_hugepagesz(void) -{ - if (!saw_hugepagesz) { - BUILD_BUG_ON(ADDITIONAL_HUGE_SIZE >= PMD_SIZE || - ADDITIONAL_HUGE_SIZE <= PAGE_SIZE); - BUILD_BUG_ON((PAGE_SIZE << ADDITIONAL_HUGE_SHIFT) != - ADDITIONAL_HUGE_SIZE); - BUILD_BUG_ON(ADDITIONAL_HUGE_SHIFT & 1); - hugetlb_add_hstate(ADDITIONAL_HUGE_SHIFT); - } - return 0; -} -arch_initcall(add_default_hugepagesz); -#endif - -#endif /* CONFIG_HUGETLB_SUPER_PAGES */ diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c deleted file mode 100644 index 5f757e04bcd2..000000000000 --- a/arch/tile/mm/init.c +++ /dev/null @@ -1,956 +0,0 @@ -/* - * Copyright (C) 1995 Linus Torvalds - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#include <linux/module.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/mman.h> -#include <linux/mm.h> -#include <linux/hugetlb.h> -#include <linux/swap.h> -#include <linux/smp.h> -#include <linux/init.h> -#include <linux/highmem.h> -#include <linux/pagemap.h> -#include <linux/poison.h> -#include <linux/bootmem.h> -#include <linux/slab.h> -#include <linux/proc_fs.h> -#include <linux/efi.h> -#include <linux/memory_hotplug.h> -#include <linux/uaccess.h> -#include <asm/mmu_context.h> -#include <asm/processor.h> -#include <asm/pgtable.h> -#include <asm/pgalloc.h> -#include <asm/dma.h> -#include <asm/fixmap.h> -#include <asm/tlb.h> -#include <asm/tlbflush.h> -#include <asm/sections.h> -#include <asm/setup.h> -#include <asm/homecache.h> -#include <hv/hypervisor.h> -#include <arch/chip.h> - -#include "migrate.h" - -#define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0)) - -#ifndef __tilegx__ -unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE; -EXPORT_SYMBOL(VMALLOC_RESERVE); -#endif - -/* Create an L2 page table */ -static pte_t * __init alloc_pte(void) -{ - return __alloc_bootmem(L2_KERNEL_PGTABLE_SIZE, HV_PAGE_TABLE_ALIGN, 0); -} - -/* - * L2 page tables per controller. We allocate these all at once from - * the bootmem allocator and store them here. This saves on kernel L2 - * page table memory, compared to allocating a full 64K page per L2 - * page table, and also means that in cases where we use huge pages, - * we are guaranteed to later be able to shatter those huge pages and - * switch to using these page tables instead, without requiring - * further allocation. Each l2_ptes[] entry points to the first page - * table for the first hugepage-size piece of memory on the - * controller; other page tables are just indexed directly, i.e. the - * L2 page tables are contiguous in memory for each controller. - */ -static pte_t *l2_ptes[MAX_NUMNODES]; -static int num_l2_ptes[MAX_NUMNODES]; - -static void init_prealloc_ptes(int node, int pages) -{ - BUG_ON(pages & (PTRS_PER_PTE - 1)); - if (pages) { - num_l2_ptes[node] = pages; - l2_ptes[node] = __alloc_bootmem(pages * sizeof(pte_t), - HV_PAGE_TABLE_ALIGN, 0); - } -} - -pte_t *get_prealloc_pte(unsigned long pfn) -{ - int node = pfn_to_nid(pfn); - pfn &= ~(-1UL << (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT)); - BUG_ON(node >= MAX_NUMNODES); - BUG_ON(pfn >= num_l2_ptes[node]); - return &l2_ptes[node][pfn]; -} - -/* - * What caching do we expect pages from the heap to have when - * they are allocated during bootup? (Once we've installed the - * "real" swapper_pg_dir.) - */ -static int initial_heap_home(void) -{ - if (hash_default) - return PAGE_HOME_HASH; - return smp_processor_id(); -} - -/* - * Place a pointer to an L2 page table in a middle page - * directory entry. - */ -static void __init assign_pte(pmd_t *pmd, pte_t *page_table) -{ - phys_addr_t pa = __pa(page_table); - unsigned long l2_ptfn = pa >> HV_LOG2_PAGE_TABLE_ALIGN; - pte_t pteval = hv_pte_set_ptfn(__pgprot(_PAGE_TABLE), l2_ptfn); - BUG_ON((pa & (HV_PAGE_TABLE_ALIGN-1)) != 0); - pteval = pte_set_home(pteval, initial_heap_home()); - *(pte_t *)pmd = pteval; - if (page_table != (pte_t *)pmd_page_vaddr(*pmd)) - BUG(); -} - -#ifdef __tilegx__ - -static inline pmd_t *alloc_pmd(void) -{ - return __alloc_bootmem(L1_KERNEL_PGTABLE_SIZE, HV_PAGE_TABLE_ALIGN, 0); -} - -static inline void assign_pmd(pud_t *pud, pmd_t *pmd) -{ - assign_pte((pmd_t *)pud, (pte_t *)pmd); -} - -#endif /* __tilegx__ */ - -/* Replace the given pmd with a full PTE table. */ -void __init shatter_pmd(pmd_t *pmd) -{ - pte_t *pte = get_prealloc_pte(pte_pfn(*(pte_t *)pmd)); - assign_pte(pmd, pte); -} - -#ifdef __tilegx__ -static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) -{ - pud_t *pud = pud_offset(&pgtables[pgd_index(va)], va); - if (pud_none(*pud)) - assign_pmd(pud, alloc_pmd()); - return pmd_offset(pud, va); -} -#else -static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) -{ - return pmd_offset(pud_offset(&pgtables[pgd_index(va)], va), va); -} -#endif - -/* - * This function initializes a certain range of kernel virtual memory - * with new bootmem page tables, everywhere page tables are missing in - * the given range. - */ - -/* - * NOTE: The pagetables are allocated contiguous on the physical space - * so we can cache the place of the first one and move around without - * checking the pgd every time. - */ -static void __init page_table_range_init(unsigned long start, - unsigned long end, pgd_t *pgd) -{ - unsigned long vaddr; - start = round_down(start, PMD_SIZE); - end = round_up(end, PMD_SIZE); - for (vaddr = start; vaddr < end; vaddr += PMD_SIZE) { - pmd_t *pmd = get_pmd(pgd, vaddr); - if (pmd_none(*pmd)) - assign_pte(pmd, alloc_pte()); - } -} - - -static int __initdata ktext_hash = 1; /* .text pages */ -static int __initdata kdata_hash = 1; /* .data and .bss pages */ -int __ro_after_init hash_default = 1; /* kernel allocator pages */ -EXPORT_SYMBOL(hash_default); -int __ro_after_init kstack_hash = 1; /* if no homecaching, use h4h */ - -/* - * CPUs to use to for striping the pages of kernel data. If hash-for-home - * is available, this is only relevant if kcache_hash sets up the - * .data and .bss to be page-homed, and we don't want the default mode - * of using the full set of kernel cpus for the striping. - */ -static __initdata struct cpumask kdata_mask; -static __initdata int kdata_arg_seen; - -int __ro_after_init kdata_huge; /* if no homecaching, small pages */ - - -/* Combine a generic pgprot_t with cache home to get a cache-aware pgprot. */ -static pgprot_t __init construct_pgprot(pgprot_t prot, int home) -{ - prot = pte_set_home(prot, home); - if (home == PAGE_HOME_IMMUTABLE) { - if (ktext_hash) - prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3); - else - prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); - } - return prot; -} - -/* - * For a given kernel data VA, how should it be cached? - * We return the complete pgprot_t with caching bits set. - */ -static pgprot_t __init init_pgprot(ulong address) -{ - int cpu; - unsigned long page; - enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET }; - - /* For kdata=huge, everything is just hash-for-home. */ - if (kdata_huge) - return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); - - /* - * We map the aliased pages of permanent text so we can - * update them if necessary, for ftrace, etc. - */ - if (address < (ulong) _sinittext - CODE_DELTA) - return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); - - /* We map read-only data non-coherent for performance. */ - if ((address >= (ulong) __start_rodata && - address < (ulong) __end_rodata) || - address == (ulong) empty_zero_page) { - return construct_pgprot(PAGE_KERNEL_RO, PAGE_HOME_IMMUTABLE); - } - -#ifndef __tilegx__ - /* Force the atomic_locks[] array page to be hash-for-home. */ - if (address == (ulong) atomic_locks) - return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); -#endif - - /* - * Everything else that isn't data or bss is heap, so mark it - * with the initial heap home (hash-for-home, or this cpu). This - * includes any addresses after the loaded image and any address before - * __init_end, since we already captured the case of text before - * _sinittext, and __pa(einittext) is approximately __pa(__init_begin). - * - * All the LOWMEM pages that we mark this way will get their - * struct page homecache properly marked later, in set_page_homes(). - * The HIGHMEM pages we leave with a default zero for their - * homes, but with a zero free_time we don't have to actually - * do a flush action the first time we use them, either. - */ - if (address >= (ulong) _end || address < (ulong) __init_end) - return construct_pgprot(PAGE_KERNEL, initial_heap_home()); - - /* Use hash-for-home if requested for data/bss. */ - if (kdata_hash) - return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); - - /* - * Otherwise we just hand out consecutive cpus. To avoid - * requiring this function to hold state, we just walk forward from - * __end_rodata by PAGE_SIZE, skipping the readonly and init data, to - * reach the requested address, while walking cpu home around - * kdata_mask. This is typically no more than a dozen or so iterations. - */ - page = (((ulong)__end_rodata) + PAGE_SIZE - 1) & PAGE_MASK; - BUG_ON(address < page || address >= (ulong)_end); - cpu = cpumask_first(&kdata_mask); - for (; page < address; page += PAGE_SIZE) { - if (page >= (ulong)&init_thread_union && - page < (ulong)&init_thread_union + THREAD_SIZE) - continue; - if (page == (ulong)empty_zero_page) - continue; -#ifndef __tilegx__ - if (page == (ulong)atomic_locks) - continue; -#endif - cpu = cpumask_next(cpu, &kdata_mask); - if (cpu == NR_CPUS) - cpu = cpumask_first(&kdata_mask); - } - return construct_pgprot(PAGE_KERNEL, cpu); -} - -/* - * This function sets up how we cache the kernel text. If we have - * hash-for-home support, normally that is used instead (see the - * kcache_hash boot flag for more information). But if we end up - * using a page-based caching technique, this option sets up the - * details of that. In addition, the "ktext=nocache" option may - * always be used to disable local caching of text pages, if desired. - */ - -static int __initdata ktext_arg_seen; -static int __initdata ktext_small; -static int __initdata ktext_local; -static int __initdata ktext_all; -static int __initdata ktext_nondataplane; -static int __initdata ktext_nocache; -static struct cpumask __initdata ktext_mask; - -static int __init setup_ktext(char *str) -{ - if (str == NULL) - return -EINVAL; - - /* If you have a leading "nocache", turn off ktext caching */ - if (strncmp(str, "nocache", 7) == 0) { - ktext_nocache = 1; - pr_info("ktext: disabling local caching of kernel text\n"); - str += 7; - if (*str == ',') - ++str; - if (*str == '\0') - return 0; - } - - ktext_arg_seen = 1; - - /* Default setting: use a huge page */ - if (strcmp(str, "huge") == 0) - pr_info("ktext: using one huge locally cached page\n"); - - /* Pay TLB cost but get no cache benefit: cache small pages locally */ - else if (strcmp(str, "local") == 0) { - ktext_small = 1; - ktext_local = 1; - pr_info("ktext: using small pages with local caching\n"); - } - - /* Neighborhood cache ktext pages on all cpus. */ - else if (strcmp(str, "all") == 0) { - ktext_small = 1; - ktext_all = 1; - pr_info("ktext: using maximal caching neighborhood\n"); - } - - - /* Neighborhood ktext pages on specified mask */ - else if (cpulist_parse(str, &ktext_mask) == 0) { - if (cpumask_weight(&ktext_mask) > 1) { - ktext_small = 1; - pr_info("ktext: using caching neighborhood %*pbl with small pages\n", - cpumask_pr_args(&ktext_mask)); - } else { - pr_info("ktext: caching on cpu %*pbl with one huge page\n", - cpumask_pr_args(&ktext_mask)); - } - } - - else if (*str) - return -EINVAL; - - return 0; -} - -early_param("ktext", setup_ktext); - - -static inline pgprot_t ktext_set_nocache(pgprot_t prot) -{ - if (!ktext_nocache) - prot = hv_pte_set_nc(prot); - else - prot = hv_pte_set_no_alloc_l2(prot); - return prot; -} - -/* Temporary page table we use for staging. */ -static pgd_t pgtables[PTRS_PER_PGD] - __attribute__((aligned(HV_PAGE_TABLE_ALIGN))); - -/* - * This maps the physical memory to kernel virtual address space, a total - * of max_low_pfn pages, by creating page tables starting from address - * PAGE_OFFSET. - * - * This routine transitions us from using a set of compiled-in large - * pages to using some more precise caching, including removing access - * to code pages mapped at PAGE_OFFSET (executed only at MEM_SV_START) - * marking read-only data as locally cacheable, striping the remaining - * .data and .bss across all the available tiles, and removing access - * to pages above the top of RAM (thus ensuring a page fault from a bad - * virtual address rather than a hypervisor shoot down for accessing - * memory outside the assigned limits). - */ -static void __init kernel_physical_mapping_init(pgd_t *pgd_base) -{ - unsigned long long irqmask; - unsigned long address, pfn; - pmd_t *pmd; - pte_t *pte; - int pte_ofs; - const struct cpumask *my_cpu_mask = cpumask_of(smp_processor_id()); - struct cpumask kstripe_mask; - int rc, i; - - if (ktext_arg_seen && ktext_hash) { - pr_warn("warning: \"ktext\" boot argument ignored if \"kcache_hash\" sets up text hash-for-home\n"); - ktext_small = 0; - } - - if (kdata_arg_seen && kdata_hash) { - pr_warn("warning: \"kdata\" boot argument ignored if \"kcache_hash\" sets up data hash-for-home\n"); - } - - if (kdata_huge && !hash_default) { - pr_warn("warning: disabling \"kdata=huge\"; requires kcache_hash=all or =allbutstack\n"); - kdata_huge = 0; - } - - /* - * Set up a mask for cpus to use for kernel striping. - * This is normally all cpus, but minus dataplane cpus if any. - * If the dataplane covers the whole chip, we stripe over - * the whole chip too. - */ - cpumask_copy(&kstripe_mask, cpu_possible_mask); - if (!kdata_arg_seen) - kdata_mask = kstripe_mask; - - /* Allocate and fill in L2 page tables */ - for (i = 0; i < MAX_NUMNODES; ++i) { -#ifdef CONFIG_HIGHMEM - unsigned long end_pfn = node_lowmem_end_pfn[i]; -#else - unsigned long end_pfn = node_end_pfn[i]; -#endif - unsigned long end_huge_pfn = 0; - - /* Pre-shatter the last huge page to allow per-cpu pages. */ - if (kdata_huge) - end_huge_pfn = end_pfn - (HPAGE_SIZE >> PAGE_SHIFT); - - pfn = node_start_pfn[i]; - - /* Allocate enough memory to hold L2 page tables for node. */ - init_prealloc_ptes(i, end_pfn - pfn); - - address = (unsigned long) pfn_to_kaddr(pfn); - while (pfn < end_pfn) { - BUG_ON(address & (HPAGE_SIZE-1)); - pmd = get_pmd(pgtables, address); - pte = get_prealloc_pte(pfn); - if (pfn < end_huge_pfn) { - pgprot_t prot = init_pgprot(address); - *(pte_t *)pmd = pte_mkhuge(pfn_pte(pfn, prot)); - for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE; - pfn++, pte_ofs++, address += PAGE_SIZE) - pte[pte_ofs] = pfn_pte(pfn, prot); - } else { - if (kdata_huge) - printk(KERN_DEBUG "pre-shattered huge page at %#lx\n", - address); - for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE; - pfn++, pte_ofs++, address += PAGE_SIZE) { - pgprot_t prot = init_pgprot(address); - pte[pte_ofs] = pfn_pte(pfn, prot); - } - assign_pte(pmd, pte); - } - } - } - - /* - * Set or check ktext_map now that we have cpu_possible_mask - * and kstripe_mask to work with. - */ - if (ktext_all) - cpumask_copy(&ktext_mask, cpu_possible_mask); - else if (ktext_nondataplane) - ktext_mask = kstripe_mask; - else if (!cpumask_empty(&ktext_mask)) { - /* Sanity-check any mask that was requested */ - struct cpumask bad; - cpumask_andnot(&bad, &ktext_mask, cpu_possible_mask); - cpumask_and(&ktext_mask, &ktext_mask, cpu_possible_mask); - if (!cpumask_empty(&bad)) - pr_info("ktext: not using unavailable cpus %*pbl\n", - cpumask_pr_args(&bad)); - if (cpumask_empty(&ktext_mask)) { - pr_warn("ktext: no valid cpus; caching on %d\n", - smp_processor_id()); - cpumask_copy(&ktext_mask, - cpumask_of(smp_processor_id())); - } - } - - address = MEM_SV_START; - pmd = get_pmd(pgtables, address); - pfn = 0; /* code starts at PA 0 */ - if (ktext_small) { - /* Allocate an L2 PTE for the kernel text */ - int cpu = 0; - pgprot_t prot = construct_pgprot(PAGE_KERNEL_EXEC, - PAGE_HOME_IMMUTABLE); - - if (ktext_local) { - if (ktext_nocache) - prot = hv_pte_set_mode(prot, - HV_PTE_MODE_UNCACHED); - else - prot = hv_pte_set_mode(prot, - HV_PTE_MODE_CACHE_NO_L3); - } else { - prot = hv_pte_set_mode(prot, - HV_PTE_MODE_CACHE_TILE_L3); - cpu = cpumask_first(&ktext_mask); - - prot = ktext_set_nocache(prot); - } - - BUG_ON(address != (unsigned long)_text); - pte = NULL; - for (; address < (unsigned long)_einittext; - pfn++, address += PAGE_SIZE) { - pte_ofs = pte_index(address); - if (pte_ofs == 0) { - if (pte) - assign_pte(pmd++, pte); - pte = alloc_pte(); - } - if (!ktext_local) { - prot = set_remote_cache_cpu(prot, cpu); - cpu = cpumask_next(cpu, &ktext_mask); - if (cpu == NR_CPUS) - cpu = cpumask_first(&ktext_mask); - } - pte[pte_ofs] = pfn_pte(pfn, prot); - } - if (pte) - assign_pte(pmd, pte); - } else { - pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); - pteval = pte_mkhuge(pteval); - if (ktext_hash) { - pteval = hv_pte_set_mode(pteval, - HV_PTE_MODE_CACHE_HASH_L3); - pteval = ktext_set_nocache(pteval); - } else - if (cpumask_weight(&ktext_mask) == 1) { - pteval = set_remote_cache_cpu(pteval, - cpumask_first(&ktext_mask)); - pteval = hv_pte_set_mode(pteval, - HV_PTE_MODE_CACHE_TILE_L3); - pteval = ktext_set_nocache(pteval); - } else if (ktext_nocache) - pteval = hv_pte_set_mode(pteval, - HV_PTE_MODE_UNCACHED); - else - pteval = hv_pte_set_mode(pteval, - HV_PTE_MODE_CACHE_NO_L3); - for (; address < (unsigned long)_einittext; - pfn += PFN_DOWN(HPAGE_SIZE), address += HPAGE_SIZE) - *(pte_t *)(pmd++) = pfn_pte(pfn, pteval); - } - - /* Set swapper_pgprot here so it is flushed to memory right away. */ - swapper_pgprot = init_pgprot((unsigned long)swapper_pg_dir); - - /* - * Since we may be changing the caching of the stack and page - * table itself, we invoke an assembly helper to do the - * following steps: - * - * - flush the cache so we start with an empty slate - * - install pgtables[] as the real page table - * - flush the TLB so the new page table takes effect - */ - irqmask = interrupt_mask_save_mask(); - interrupt_mask_set_mask(-1ULL); - rc = flush_and_install_context(__pa(pgtables), - init_pgprot((unsigned long)pgtables), - __this_cpu_read(current_asid), - cpumask_bits(my_cpu_mask)); - interrupt_mask_restore_mask(irqmask); - BUG_ON(rc != 0); - - /* Copy the page table back to the normal swapper_pg_dir. */ - memcpy(pgd_base, pgtables, sizeof(pgtables)); - __install_page_table(pgd_base, __this_cpu_read(current_asid), - swapper_pgprot); - - /* - * We just read swapper_pgprot and thus brought it into the cache, - * with its new home & caching mode. When we start the other CPUs, - * they're going to reference swapper_pgprot via their initial fake - * VA-is-PA mappings, which cache everything locally. At that - * time, if it's in our cache with a conflicting home, the - * simulator's coherence checker will complain. So, flush it out - * of our cache; we're not going to ever use it again anyway. - */ - __insn_finv(&swapper_pgprot); -} - -/* - * devmem_is_allowed() checks to see if /dev/mem access to a certain address - * is valid. The argument is a physical page number. - * - * On Tile, the only valid things for which we can just hand out unchecked - * PTEs are the kernel code and data. Anything else might change its - * homing with time, and we wouldn't know to adjust the /dev/mem PTEs. - * Note that init_thread_union is released to heap soon after boot, - * so we include it in the init data. - * - * For TILE-Gx, we might want to consider allowing access to PA - * regions corresponding to PCI space, etc. - */ -int devmem_is_allowed(unsigned long pagenr) -{ - return pagenr < kaddr_to_pfn(_end) && - !(pagenr >= kaddr_to_pfn(&init_thread_union) || - pagenr < kaddr_to_pfn(__init_end)) && - !(pagenr >= kaddr_to_pfn(_sinittext) || - pagenr <= kaddr_to_pfn(_einittext-1)); -} - -#ifdef CONFIG_HIGHMEM -static void __init permanent_kmaps_init(pgd_t *pgd_base) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - unsigned long vaddr; - - vaddr = PKMAP_BASE; - page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); - - pgd = swapper_pg_dir + pgd_index(vaddr); - pud = pud_offset(pgd, vaddr); - pmd = pmd_offset(pud, vaddr); - pte = pte_offset_kernel(pmd, vaddr); - pkmap_page_table = pte; -} -#endif /* CONFIG_HIGHMEM */ - - -#ifndef CONFIG_64BIT -static void __init init_free_pfn_range(unsigned long start, unsigned long end) -{ - unsigned long pfn; - struct page *page = pfn_to_page(start); - - for (pfn = start; pfn < end; ) { - /* Optimize by freeing pages in large batches */ - int order = __ffs(pfn); - int count, i; - struct page *p; - - if (order >= MAX_ORDER) - order = MAX_ORDER-1; - count = 1 << order; - while (pfn + count > end) { - count >>= 1; - --order; - } - for (p = page, i = 0; i < count; ++i, ++p) { - __ClearPageReserved(p); - /* - * Hacky direct set to avoid unnecessary - * lock take/release for EVERY page here. - */ - p->_refcount.counter = 0; - p->_mapcount.counter = -1; - } - init_page_count(page); - __free_pages(page, order); - adjust_managed_page_count(page, count); - - page += count; - pfn += count; - } -} - -static void __init set_non_bootmem_pages_init(void) -{ - struct zone *z; - for_each_zone(z) { - unsigned long start, end; - int nid = z->zone_pgdat->node_id; -#ifdef CONFIG_HIGHMEM - int idx = zone_idx(z); -#endif - - start = z->zone_start_pfn; - end = start + z->spanned_pages; - start = max(start, node_free_pfn[nid]); - start = max(start, max_low_pfn); - -#ifdef CONFIG_HIGHMEM - if (idx == ZONE_HIGHMEM) - totalhigh_pages += z->spanned_pages; -#endif - if (kdata_huge) { - unsigned long percpu_pfn = node_percpu_pfn[nid]; - if (start < percpu_pfn && end > percpu_pfn) - end = percpu_pfn; - } -#ifdef CONFIG_PCI - if (start <= pci_reserve_start_pfn && - end > pci_reserve_start_pfn) { - if (end > pci_reserve_end_pfn) - init_free_pfn_range(pci_reserve_end_pfn, end); - end = pci_reserve_start_pfn; - } -#endif - init_free_pfn_range(start, end); - } -} -#endif - -/* - * paging_init() sets up the page tables - note that all of lowmem is - * already mapped by head.S. - */ -void __init paging_init(void) -{ -#ifdef __tilegx__ - pud_t *pud; -#endif - pgd_t *pgd_base = swapper_pg_dir; - - kernel_physical_mapping_init(pgd_base); - - /* Fixed mappings, only the page table structure has to be created. */ - page_table_range_init(fix_to_virt(__end_of_fixed_addresses - 1), - FIXADDR_TOP, pgd_base); - -#ifdef CONFIG_HIGHMEM - permanent_kmaps_init(pgd_base); -#endif - -#ifdef __tilegx__ - /* - * Since GX allocates just one pmd_t array worth of vmalloc space, - * we go ahead and allocate it statically here, then share it - * globally. As a result we don't have to worry about any task - * changing init_mm once we get up and running, and there's no - * need for e.g. vmalloc_sync_all(). - */ - BUILD_BUG_ON(pgd_index(VMALLOC_START) != pgd_index(VMALLOC_END - 1)); - pud = pud_offset(pgd_base + pgd_index(VMALLOC_START), VMALLOC_START); - assign_pmd(pud, alloc_pmd()); -#endif -} - - -/* - * Walk the kernel page tables and derive the page_home() from - * the PTEs, so that set_pte() can properly validate the caching - * of all PTEs it sees. - */ -void __init set_page_homes(void) -{ -} - -static void __init set_max_mapnr_init(void) -{ -#ifdef CONFIG_FLATMEM - max_mapnr = max_low_pfn; -#endif -} - -void __init mem_init(void) -{ - int i; -#ifndef __tilegx__ - void *last; -#endif - -#ifdef CONFIG_FLATMEM - BUG_ON(!mem_map); -#endif - -#ifdef CONFIG_HIGHMEM - /* check that fixmap and pkmap do not overlap */ - if (PKMAP_ADDR(LAST_PKMAP-1) >= FIXADDR_START) { - pr_err("fixmap and kmap areas overlap - this will crash\n"); - pr_err("pkstart: %lxh pkend: %lxh fixstart %lxh\n", - PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP-1), FIXADDR_START); - BUG(); - } -#endif - - set_max_mapnr_init(); - - /* this will put all bootmem onto the freelists */ - free_all_bootmem(); - -#ifndef CONFIG_64BIT - /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ - set_non_bootmem_pages_init(); -#endif - - mem_init_print_info(NULL); - - /* - * In debug mode, dump some interesting memory mappings. - */ -#ifdef CONFIG_HIGHMEM - printk(KERN_DEBUG " KMAP %#lx - %#lx\n", - FIXADDR_START, FIXADDR_TOP + PAGE_SIZE - 1); - printk(KERN_DEBUG " PKMAP %#lx - %#lx\n", - PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1); -#endif - printk(KERN_DEBUG " VMALLOC %#lx - %#lx\n", - _VMALLOC_START, _VMALLOC_END - 1); -#ifdef __tilegx__ - for (i = MAX_NUMNODES-1; i >= 0; --i) { - struct pglist_data *node = &node_data[i]; - if (node->node_present_pages) { - unsigned long start = (unsigned long) - pfn_to_kaddr(node->node_start_pfn); - unsigned long end = start + - (node->node_present_pages << PAGE_SHIFT); - printk(KERN_DEBUG " MEM%d %#lx - %#lx\n", - i, start, end - 1); - } - } -#else - last = high_memory; - for (i = MAX_NUMNODES-1; i >= 0; --i) { - if ((unsigned long)vbase_map[i] != -1UL) { - printk(KERN_DEBUG " LOWMEM%d %#lx - %#lx\n", - i, (unsigned long) (vbase_map[i]), - (unsigned long) (last-1)); - last = vbase_map[i]; - } - } -#endif - -#ifndef __tilegx__ - /* - * Convert from using one lock for all atomic operations to - * one per cpu. - */ - __init_atomic_per_cpu(); -#endif -} - -struct kmem_cache *pgd_cache; - -void __init pgtable_cache_init(void) -{ - pgd_cache = kmem_cache_create("pgd", SIZEOF_PGD, SIZEOF_PGD, 0, NULL); - if (!pgd_cache) - panic("pgtable_cache_init(): Cannot create pgd cache"); -} - -static long __ro_after_init initfree = 1; -static bool __ro_after_init set_initfree_done; - -/* Select whether to free (1) or mark unusable (0) the __init pages. */ -static int __init set_initfree(char *str) -{ - long val; - if (kstrtol(str, 0, &val) == 0) { - set_initfree_done = true; - initfree = val; - pr_info("initfree: %s free init pages\n", - initfree ? "will" : "won't"); - } - return 1; -} -__setup("initfree=", set_initfree); - -static void free_init_pages(char *what, unsigned long begin, unsigned long end) -{ - unsigned long addr = (unsigned long) begin; - - /* Prefer user request first */ - if (!set_initfree_done) { - if (debug_pagealloc_enabled()) - initfree = 0; - } - if (kdata_huge && !initfree) { - pr_warn("Warning: ignoring initfree=0: incompatible with kdata=huge\n"); - initfree = 1; - } - end = (end + PAGE_SIZE - 1) & PAGE_MASK; - local_flush_tlb_pages(NULL, begin, PAGE_SIZE, end - begin); - for (addr = begin; addr < end; addr += PAGE_SIZE) { - /* - * Note we just reset the home here directly in the - * page table. We know this is safe because our caller - * just flushed the caches on all the other cpus, - * and they won't be touching any of these pages. - */ - int pfn = kaddr_to_pfn((void *)addr); - struct page *page = pfn_to_page(pfn); - pte_t *ptep = virt_to_kpte(addr); - if (!initfree) { - /* - * If debugging page accesses then do not free - * this memory but mark them not present - any - * buggy init-section access will create a - * kernel page fault: - */ - pte_clear(&init_mm, addr, ptep); - continue; - } - if (pte_huge(*ptep)) - BUG_ON(!kdata_huge); - else - set_pte_at(&init_mm, addr, ptep, - pfn_pte(pfn, PAGE_KERNEL)); - memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); - free_reserved_page(page); - } - pr_info("Freeing %s: %ldk freed\n", what, (end - begin) >> 10); -} - -void free_initmem(void) -{ - const unsigned long text_delta = MEM_SV_START - PAGE_OFFSET; - - /* - * Evict the cache on all cores to avoid incoherence. - * We are guaranteed that no one will touch the init pages any more. - */ - homecache_evict(&cpu_cacheable_map); - - /* Free the data pages that we won't use again after init. */ - free_init_pages("unused kernel data", - (unsigned long)__init_begin, - (unsigned long)__init_end); - - /* - * Free the pages mapped from 0xc0000000 that correspond to code - * pages from MEM_SV_START that we won't use again after init. - */ - free_init_pages("unused kernel text", - (unsigned long)_sinittext - text_delta, - (unsigned long)_einittext - text_delta); - /* Do a global TLB flush so everyone sees the changes. */ - flush_tlb_all(); -} diff --git a/arch/tile/mm/migrate.h b/arch/tile/mm/migrate.h deleted file mode 100644 index 91683d97917e..000000000000 --- a/arch/tile/mm/migrate.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * Structure definitions for migration, exposed here for use by - * arch/tile/kernel/asm-offsets.c. - */ - -#ifndef MM_MIGRATE_H -#define MM_MIGRATE_H - -#include <linux/cpumask.h> -#include <hv/hypervisor.h> - -/* - * This function is used as a helper when setting up the initial - * page table (swapper_pg_dir). - * - * You must mask ALL interrupts prior to invoking this code, since - * you can't legally touch the stack during the cache flush. - */ -extern int flush_and_install_context(HV_PhysAddr page_table, HV_PTE access, - HV_ASID asid, - const unsigned long *cpumask); - -/* - * This function supports migration as a "helper" as follows: - * - * - Set the stack PTE itself to "migrating". - * - Do a global TLB flush for (va,length) and the specified ASIDs. - * - Do a cache-evict on all necessary cpus. - * - Write the new stack PTE. - * - * Note that any non-NULL pointers must not point to the page that - * is handled by the stack_pte itself. - * - * You must mask ALL interrupts prior to invoking this code, since - * you can't legally touch the stack during the cache flush. - */ -extern int homecache_migrate_stack_and_flush(pte_t stack_pte, unsigned long va, - size_t length, pte_t *stack_ptep, - const struct cpumask *cache_cpumask, - const struct cpumask *tlb_cpumask, - HV_Remote_ASID *asids, - int asidcount); - -#endif /* MM_MIGRATE_H */ diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S deleted file mode 100644 index 772085491bf9..000000000000 --- a/arch/tile/mm/migrate_32.S +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * This routine is a helper for migrating the home of a set of pages to - * a new cpu. See the documentation in homecache.c for more information. - */ - -#include <linux/linkage.h> -#include <linux/threads.h> -#include <asm/page.h> -#include <asm/thread_info.h> -#include <asm/types.h> -#include <asm/asm-offsets.h> -#include <hv/hypervisor.h> - - .text - -/* - * First, some definitions that apply to all the code in the file. - */ - -/* Locals (caller-save) */ -#define r_tmp r10 -#define r_save_sp r11 - -/* What we save where in the stack frame; must include all callee-saves. */ -#define FRAME_SP 4 -#define FRAME_R30 8 -#define FRAME_R31 12 -#define FRAME_R32 16 -#define FRAME_R33 20 -#define FRAME_R34 24 -#define FRAME_SIZE 28 - - - - -/* - * On entry: - * - * r0 low word of the new context PA to install (moved to r_context_lo) - * r1 high word of the new context PA to install (moved to r_context_hi) - * r2 low word of PTE to use for context access (moved to r_access_lo) - * r3 high word of PTE to use for context access (moved to r_access_lo) - * r4 ASID to use for new context (moved to r_asid) - * r5 pointer to cpumask with just this cpu set in it (r_my_cpumask) - */ - -/* Arguments (caller-save) */ -#define r_context_lo_in r0 -#define r_context_hi_in r1 -#define r_access_lo_in r2 -#define r_access_hi_in r3 -#define r_asid_in r4 -#define r_my_cpumask r5 - -/* Locals (callee-save); must not be more than FRAME_xxx above. */ -#define r_context_lo r30 -#define r_context_hi r31 -#define r_access_lo r32 -#define r_access_hi r33 -#define r_asid r34 - -STD_ENTRY(flush_and_install_context) - /* - * Create a stack frame; we can't touch it once we flush the - * cache until we install the new page table and flush the TLB. - */ - { - move r_save_sp, sp - sw sp, lr - addi sp, sp, -FRAME_SIZE - } - addi r_tmp, sp, FRAME_SP - { - sw r_tmp, r_save_sp - addi r_tmp, sp, FRAME_R30 - } - { - sw r_tmp, r30 - addi r_tmp, sp, FRAME_R31 - } - { - sw r_tmp, r31 - addi r_tmp, sp, FRAME_R32 - } - { - sw r_tmp, r32 - addi r_tmp, sp, FRAME_R33 - } - { - sw r_tmp, r33 - addi r_tmp, sp, FRAME_R34 - } - sw r_tmp, r34 - - /* Move some arguments to callee-save registers. */ - { - move r_context_lo, r_context_lo_in - move r_context_hi, r_context_hi_in - } - { - move r_access_lo, r_access_lo_in - move r_access_hi, r_access_hi_in - } - move r_asid, r_asid_in - - /* First, flush our L2 cache. */ - { - move r0, zero /* cache_pa */ - move r1, zero - } - { - auli r2, zero, ha16(HV_FLUSH_EVICT_L2) /* cache_control */ - move r3, r_my_cpumask /* cache_cpumask */ - } - { - move r4, zero /* tlb_va */ - move r5, zero /* tlb_length */ - } - { - move r6, zero /* tlb_pgsize */ - move r7, zero /* tlb_cpumask */ - } - { - move r8, zero /* asids */ - move r9, zero /* asidcount */ - } - jal _hv_flush_remote - bnz r0, .Ldone - - /* Now install the new page table. */ - { - move r0, r_context_lo - move r1, r_context_hi - } - { - move r2, r_access_lo - move r3, r_access_hi - } - { - move r4, r_asid - moveli r5, HV_CTX_DIRECTIO | CTX_PAGE_FLAG - } - jal _hv_install_context - bnz r0, .Ldone - - /* Finally, flush the TLB. */ - { - movei r0, 0 /* preserve_global */ - jal hv_flush_all - } - -.Ldone: - /* Restore the callee-saved registers and return. */ - addli lr, sp, FRAME_SIZE - { - lw lr, lr - addli r_tmp, sp, FRAME_R30 - } - { - lw r30, r_tmp - addli r_tmp, sp, FRAME_R31 - } - { - lw r31, r_tmp - addli r_tmp, sp, FRAME_R32 - } - { - lw r32, r_tmp - addli r_tmp, sp, FRAME_R33 - } - { - lw r33, r_tmp - addli r_tmp, sp, FRAME_R34 - } - { - lw r34, r_tmp - addi sp, sp, FRAME_SIZE - } - jrp lr - STD_ENDPROC(flush_and_install_context) diff --git a/arch/tile/mm/migrate_64.S b/arch/tile/mm/migrate_64.S deleted file mode 100644 index a49eee38f872..000000000000 --- a/arch/tile/mm/migrate_64.S +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright 2011 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * This routine is a helper for migrating the home of a set of pages to - * a new cpu. See the documentation in homecache.c for more information. - */ - -#include <linux/linkage.h> -#include <linux/threads.h> -#include <asm/page.h> -#include <asm/thread_info.h> -#include <asm/types.h> -#include <asm/asm-offsets.h> -#include <hv/hypervisor.h> - - .text - -/* - * First, some definitions that apply to all the code in the file. - */ - -/* Locals (caller-save) */ -#define r_tmp r10 -#define r_save_sp r11 - -/* What we save where in the stack frame; must include all callee-saves. */ -#define FRAME_SP 8 -#define FRAME_R30 16 -#define FRAME_R31 24 -#define FRAME_R32 32 -#define FRAME_SIZE 40 - - - - -/* - * On entry: - * - * r0 the new context PA to install (moved to r_context) - * r1 PTE to use for context access (moved to r_access) - * r2 ASID to use for new context (moved to r_asid) - * r3 pointer to cpumask with just this cpu set in it (r_my_cpumask) - */ - -/* Arguments (caller-save) */ -#define r_context_in r0 -#define r_access_in r1 -#define r_asid_in r2 -#define r_my_cpumask r3 - -/* Locals (callee-save); must not be more than FRAME_xxx above. */ -#define r_context r30 -#define r_access r31 -#define r_asid r32 - -/* - * Caller-save locals and frame constants are the same as - * for homecache_migrate_stack_and_flush. - */ - -STD_ENTRY(flush_and_install_context) - /* - * Create a stack frame; we can't touch it once we flush the - * cache until we install the new page table and flush the TLB. - */ - { - move r_save_sp, sp - st sp, lr - addi sp, sp, -FRAME_SIZE - } - addi r_tmp, sp, FRAME_SP - { - st r_tmp, r_save_sp - addi r_tmp, sp, FRAME_R30 - } - { - st r_tmp, r30 - addi r_tmp, sp, FRAME_R31 - } - { - st r_tmp, r31 - addi r_tmp, sp, FRAME_R32 - } - st r_tmp, r32 - - /* Move some arguments to callee-save registers. */ - { - move r_context, r_context_in - move r_access, r_access_in - } - move r_asid, r_asid_in - - /* First, flush our L2 cache. */ - { - move r0, zero /* cache_pa */ - moveli r1, hw2_last(HV_FLUSH_EVICT_L2) /* cache_control */ - } - { - shl16insli r1, r1, hw1(HV_FLUSH_EVICT_L2) - move r2, r_my_cpumask /* cache_cpumask */ - } - { - shl16insli r1, r1, hw0(HV_FLUSH_EVICT_L2) - move r3, zero /* tlb_va */ - } - { - move r4, zero /* tlb_length */ - move r5, zero /* tlb_pgsize */ - } - { - move r6, zero /* tlb_cpumask */ - move r7, zero /* asids */ - } - { - move r8, zero /* asidcount */ - jal _hv_flush_remote - } - bnez r0, 1f - - /* Now install the new page table. */ - { - move r0, r_context - move r1, r_access - } - { - move r2, r_asid - moveli r3, HV_CTX_DIRECTIO | CTX_PAGE_FLAG - } - jal _hv_install_context - bnez r0, 1f - - /* Finally, flush the TLB. */ - { - movei r0, 0 /* preserve_global */ - jal hv_flush_all - } - -1: /* Restore the callee-saved registers and return. */ - addli lr, sp, FRAME_SIZE - { - ld lr, lr - addli r_tmp, sp, FRAME_R30 - } - { - ld r30, r_tmp - addli r_tmp, sp, FRAME_R31 - } - { - ld r31, r_tmp - addli r_tmp, sp, FRAME_R32 - } - { - ld r32, r_tmp - addi sp, sp, FRAME_SIZE - } - jrp lr - STD_ENDPROC(flush_and_install_context) diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c deleted file mode 100644 index 8ab28167c44b..000000000000 --- a/arch/tile/mm/mmap.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - * - * Taken from the i386 architecture and simplified. - */ - -#include <linux/mm.h> -#include <linux/random.h> -#include <linux/limits.h> -#include <linux/sched/signal.h> -#include <linux/sched/mm.h> -#include <linux/mman.h> -#include <linux/compat.h> - -/* - * Top of mmap area (just below the process stack). - * - * Leave an at least ~128 MB hole. - */ -#define MIN_GAP (128*1024*1024) -#define MAX_GAP (TASK_SIZE/6*5) - -static inline unsigned long mmap_base(struct mm_struct *mm) -{ - unsigned long gap = rlimit(RLIMIT_STACK); - unsigned long random_factor = 0; - - if (current->flags & PF_RANDOMIZE) - random_factor = get_random_int() % (1024*1024); - - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - - return PAGE_ALIGN(TASK_SIZE - gap - random_factor); -} - -/* - * This function, called very early during the creation of a new - * process VM image, sets up which VM layout function to use: - */ -void arch_pick_mmap_layout(struct mm_struct *mm) -{ -#if !defined(__tilegx__) - int is_32bit = 1; -#elif defined(CONFIG_COMPAT) - int is_32bit = is_compat_task(); -#else - int is_32bit = 0; -#endif - unsigned long random_factor = 0UL; - - /* - * 8 bits of randomness in 32bit mmaps, 24 address space bits - * 12 bits of randomness in 64bit mmaps, 28 address space bits - */ - if (current->flags & PF_RANDOMIZE) { - if (is_32bit) - random_factor = get_random_int() % (1<<8); - else - random_factor = get_random_int() % (1<<12); - - random_factor <<= PAGE_SHIFT; - } - - /* - * Use standard layout if the expected stack growth is unlimited - * or we are running native 64 bits. - */ - if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) { - mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; - mm->get_unmapped_area = arch_get_unmapped_area; - } else { - mm->mmap_base = mmap_base(mm); - mm->get_unmapped_area = arch_get_unmapped_area_topdown; - } -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - return randomize_page(mm->brk, 0x02000000); -} diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c deleted file mode 100644 index ec5576fd3a86..000000000000 --- a/arch/tile/mm/pgtable.c +++ /dev/null @@ -1,550 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/mm.h> -#include <linux/swap.h> -#include <linux/highmem.h> -#include <linux/slab.h> -#include <linux/pagemap.h> -#include <linux/spinlock.h> -#include <linux/cpumask.h> -#include <linux/module.h> -#include <linux/io.h> -#include <linux/vmalloc.h> -#include <linux/smp.h> - -#include <asm/pgtable.h> -#include <asm/pgalloc.h> -#include <asm/fixmap.h> -#include <asm/tlb.h> -#include <asm/tlbflush.h> -#include <asm/homecache.h> - -#define K(x) ((x) << (PAGE_SHIFT-10)) - -/** - * shatter_huge_page() - ensure a given address is mapped by a small page. - * - * This function converts a huge PTE mapping kernel LOWMEM into a bunch - * of small PTEs with the same caching. No cache flush required, but we - * must do a global TLB flush. - * - * Any caller that wishes to modify a kernel mapping that might - * have been made with a huge page should call this function, - * since doing so properly avoids race conditions with installing the - * newly-shattered page and then flushing all the TLB entries. - * - * @addr: Address at which to shatter any existing huge page. - */ -void shatter_huge_page(unsigned long addr) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - unsigned long flags = 0; /* happy compiler */ -#ifdef __PAGETABLE_PMD_FOLDED - struct list_head *pos; -#endif - - /* Get a pointer to the pmd entry that we need to change. */ - addr &= HPAGE_MASK; - BUG_ON(pgd_addr_invalid(addr)); - BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */ - pgd = swapper_pg_dir + pgd_index(addr); - pud = pud_offset(pgd, addr); - BUG_ON(!pud_present(*pud)); - pmd = pmd_offset(pud, addr); - BUG_ON(!pmd_present(*pmd)); - if (!pmd_huge_page(*pmd)) - return; - - spin_lock_irqsave(&init_mm.page_table_lock, flags); - if (!pmd_huge_page(*pmd)) { - /* Lost the race to convert the huge page. */ - spin_unlock_irqrestore(&init_mm.page_table_lock, flags); - return; - } - - /* Shatter the huge page into the preallocated L2 page table. */ - pmd_populate_kernel(&init_mm, pmd, get_prealloc_pte(pmd_pfn(*pmd))); - -#ifdef __PAGETABLE_PMD_FOLDED - /* Walk every pgd on the system and update the pmd there. */ - spin_lock(&pgd_lock); - list_for_each(pos, &pgd_list) { - pmd_t *copy_pmd; - pgd = list_to_pgd(pos) + pgd_index(addr); - pud = pud_offset(pgd, addr); - copy_pmd = pmd_offset(pud, addr); - __set_pmd(copy_pmd, *pmd); - } - spin_unlock(&pgd_lock); -#endif - - /* Tell every cpu to notice the change. */ - flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE, - cpu_possible_mask, NULL, 0); - - /* Hold the lock until the TLB flush is finished to avoid races. */ - spin_unlock_irqrestore(&init_mm.page_table_lock, flags); -} - -/* - * List of all pgd's needed so it can invalidate entries in both cached - * and uncached pgd's. This is essentially codepath-based locking - * against pageattr.c; it is the unique case in which a valid change - * of kernel pagetables can't be lazily synchronized by vmalloc faults. - * vmalloc faults work because attached pagetables are never freed. - * - * The lock is always taken with interrupts disabled, unlike on x86 - * and other platforms, because we need to take the lock in - * shatter_huge_page(), which may be called from an interrupt context. - * We are not at risk from the tlbflush IPI deadlock that was seen on - * x86, since we use the flush_remote() API to have the hypervisor do - * the TLB flushes regardless of irq disabling. - */ -DEFINE_SPINLOCK(pgd_lock); -LIST_HEAD(pgd_list); - -static inline void pgd_list_add(pgd_t *pgd) -{ - list_add(pgd_to_list(pgd), &pgd_list); -} - -static inline void pgd_list_del(pgd_t *pgd) -{ - list_del(pgd_to_list(pgd)); -} - -#define KERNEL_PGD_INDEX_START pgd_index(PAGE_OFFSET) -#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_INDEX_START) - -static void pgd_ctor(pgd_t *pgd) -{ - unsigned long flags; - - memset(pgd, 0, KERNEL_PGD_INDEX_START*sizeof(pgd_t)); - spin_lock_irqsave(&pgd_lock, flags); - -#ifndef __tilegx__ - /* - * Check that the user interrupt vector has no L2. - * It never should for the swapper, and new page tables - * should always start with an empty user interrupt vector. - */ - BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0); -#endif - - memcpy(pgd + KERNEL_PGD_INDEX_START, - swapper_pg_dir + KERNEL_PGD_INDEX_START, - KERNEL_PGD_PTRS * sizeof(pgd_t)); - - pgd_list_add(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); -} - -static void pgd_dtor(pgd_t *pgd) -{ - unsigned long flags; /* can be called from interrupt context */ - - spin_lock_irqsave(&pgd_lock, flags); - pgd_list_del(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); -} - -pgd_t *pgd_alloc(struct mm_struct *mm) -{ - pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL); - if (pgd) - pgd_ctor(pgd); - return pgd; -} - -void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - pgd_dtor(pgd); - kmem_cache_free(pgd_cache, pgd); -} - - -#define L2_USER_PGTABLE_PAGES (1 << L2_USER_PGTABLE_ORDER) - -struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address, - int order) -{ - gfp_t flags = GFP_KERNEL|__GFP_ZERO; - struct page *p; - int i; - - p = alloc_pages(flags, L2_USER_PGTABLE_ORDER); - if (p == NULL) - return NULL; - - if (!pgtable_page_ctor(p)) { - __free_pages(p, L2_USER_PGTABLE_ORDER); - return NULL; - } - - /* - * Make every page have a page_count() of one, not just the first. - * We don't use __GFP_COMP since it doesn't look like it works - * correctly with tlb_remove_page(). - */ - for (i = 1; i < order; ++i) { - init_page_count(p+i); - inc_zone_page_state(p+i, NR_PAGETABLE); - } - - return p; -} - -/* - * Free page immediately (used in __pte_alloc if we raced with another - * process). We have to correct whatever pte_alloc_one() did before - * returning the pages to the allocator. - */ -void pgtable_free(struct mm_struct *mm, struct page *p, int order) -{ - int i; - - pgtable_page_dtor(p); - __free_page(p); - - for (i = 1; i < order; ++i) { - __free_page(p+i); - dec_zone_page_state(p+i, NR_PAGETABLE); - } -} - -void __pgtable_free_tlb(struct mmu_gather *tlb, struct page *pte, - unsigned long address, int order) -{ - int i; - - pgtable_page_dtor(pte); - tlb_remove_page(tlb, pte); - - for (i = 1; i < order; ++i) { - tlb_remove_page(tlb, pte + i); - dec_zone_page_state(pte + i, NR_PAGETABLE); - } -} - -#ifndef __tilegx__ - -/* - * FIXME: needs to be atomic vs hypervisor writes. For now we make the - * window of vulnerability a bit smaller by doing an unlocked 8-bit update. - */ -int ptep_test_and_clear_young(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ -#if HV_PTE_INDEX_ACCESSED < 8 || HV_PTE_INDEX_ACCESSED >= 16 -# error Code assumes HV_PTE "accessed" bit in second byte -#endif - u8 *tmp = (u8 *)ptep; - u8 second_byte = tmp[1]; - if (!(second_byte & (1 << (HV_PTE_INDEX_ACCESSED - 8)))) - return 0; - tmp[1] = second_byte & ~(1 << (HV_PTE_INDEX_ACCESSED - 8)); - return 1; -} - -/* - * This implementation is atomic vs hypervisor writes, since the hypervisor - * always writes the low word (where "accessed" and "dirty" are) and this - * routine only writes the high word. - */ -void ptep_set_wrprotect(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ -#if HV_PTE_INDEX_WRITABLE < 32 -# error Code assumes HV_PTE "writable" bit in high word -#endif - u32 *tmp = (u32 *)ptep; - tmp[1] = tmp[1] & ~(1 << (HV_PTE_INDEX_WRITABLE - 32)); -} - -#endif - -/* - * Return a pointer to the PTE that corresponds to the given - * address in the given page table. A NULL page table just uses - * the standard kernel page table; the preferred API in this case - * is virt_to_kpte(). - * - * The returned pointer can point to a huge page in other levels - * of the page table than the bottom, if the huge page is present - * in the page table. For bottom-level PTEs, the returned pointer - * can point to a PTE that is either present or not. - */ -pte_t *virt_to_pte(struct mm_struct* mm, unsigned long addr) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - - if (pgd_addr_invalid(addr)) - return NULL; - - pgd = mm ? pgd_offset(mm, addr) : swapper_pg_dir + pgd_index(addr); - pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) - return NULL; - if (pud_huge_page(*pud)) - return (pte_t *)pud; - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) - return NULL; - if (pmd_huge_page(*pmd)) - return (pte_t *)pmd; - return pte_offset_kernel(pmd, addr); -} -EXPORT_SYMBOL(virt_to_pte); - -pte_t *virt_to_kpte(unsigned long kaddr) -{ - BUG_ON(kaddr < PAGE_OFFSET); - return virt_to_pte(NULL, kaddr); -} -EXPORT_SYMBOL(virt_to_kpte); - -pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu) -{ - unsigned int width = smp_width; - int x = cpu % width; - int y = cpu / width; - BUG_ON(y >= smp_height); - BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3); - BUG_ON(cpu < 0 || cpu >= NR_CPUS); - BUG_ON(!cpu_is_valid_lotar(cpu)); - return hv_pte_set_lotar(prot, HV_XY_TO_LOTAR(x, y)); -} - -int get_remote_cache_cpu(pgprot_t prot) -{ - HV_LOTAR lotar = hv_pte_get_lotar(prot); - int x = HV_LOTAR_X(lotar); - int y = HV_LOTAR_Y(lotar); - BUG_ON(hv_pte_get_mode(prot) != HV_PTE_MODE_CACHE_TILE_L3); - return x + y * smp_width; -} - -/* - * Convert a kernel VA to a PA and homing information. - */ -int va_to_cpa_and_pte(void *va, unsigned long long *cpa, pte_t *pte) -{ - struct page *page = virt_to_page(va); - pte_t null_pte = { 0 }; - - *cpa = __pa(va); - - /* Note that this is not writing a page table, just returning a pte. */ - *pte = pte_set_home(null_pte, page_home(page)); - - return 0; /* return non-zero if not hfh? */ -} -EXPORT_SYMBOL(va_to_cpa_and_pte); - -void __set_pte(pte_t *ptep, pte_t pte) -{ -#ifdef __tilegx__ - *ptep = pte; -#else -# if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 -# error Must write the present and migrating bits last -# endif - if (pte_present(pte)) { - ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); - barrier(); - ((u32 *)ptep)[0] = (u32)(pte_val(pte)); - } else { - ((u32 *)ptep)[0] = (u32)(pte_val(pte)); - barrier(); - ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); - } -#endif /* __tilegx__ */ -} - -void set_pte(pte_t *ptep, pte_t pte) -{ - if (pte_present(pte) && - (!CHIP_HAS_MMIO() || hv_pte_get_mode(pte) != HV_PTE_MODE_MMIO)) { - /* The PTE actually references physical memory. */ - unsigned long pfn = pte_pfn(pte); - if (pfn_valid(pfn)) { - /* Update the home of the PTE from the struct page. */ - pte = pte_set_home(pte, page_home(pfn_to_page(pfn))); - } else if (hv_pte_get_mode(pte) == 0) { - /* remap_pfn_range(), etc, must supply PTE mode. */ - panic("set_pte(): out-of-range PFN and mode 0\n"); - } - } - - __set_pte(ptep, pte); -} - -/* Can this mm load a PTE with cached_priority set? */ -static inline int mm_is_priority_cached(struct mm_struct *mm) -{ - return mm->context.priority_cached != 0; -} - -/* - * Add a priority mapping to an mm_context and - * notify the hypervisor if this is the first one. - */ -void start_mm_caching(struct mm_struct *mm) -{ - if (!mm_is_priority_cached(mm)) { - mm->context.priority_cached = -1UL; - hv_set_caching(-1UL); - } -} - -/* - * Validate and return the priority_cached flag. We know if it's zero - * that we don't need to scan, since we immediately set it non-zero - * when we first consider a MAP_CACHE_PRIORITY mapping. - * - * We only _try_ to acquire the mmap_sem semaphore; if we can't acquire it, - * since we're in an interrupt context (servicing switch_mm) we don't - * worry about it and don't unset the "priority_cached" field. - * Presumably we'll come back later and have more luck and clear - * the value then; for now we'll just keep the cache marked for priority. - */ -static unsigned long update_priority_cached(struct mm_struct *mm) -{ - if (mm->context.priority_cached && down_write_trylock(&mm->mmap_sem)) { - struct vm_area_struct *vm; - for (vm = mm->mmap; vm; vm = vm->vm_next) { - if (hv_pte_get_cached_priority(vm->vm_page_prot)) - break; - } - if (vm == NULL) - mm->context.priority_cached = 0; - up_write(&mm->mmap_sem); - } - return mm->context.priority_cached; -} - -/* Set caching correctly for an mm that we are switching to. */ -void check_mm_caching(struct mm_struct *prev, struct mm_struct *next) -{ - if (!mm_is_priority_cached(next)) { - /* - * If the new mm doesn't use priority caching, just see if we - * need the hv_set_caching(), or can assume it's already zero. - */ - if (mm_is_priority_cached(prev)) - hv_set_caching(0); - } else { - hv_set_caching(update_priority_cached(next)); - } -} - -#if CHIP_HAS_MMIO() - -/* Map an arbitrary MMIO address, homed according to pgprot, into VA space. */ -void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, - pgprot_t home) -{ - void *addr; - struct vm_struct *area; - unsigned long offset, last_addr; - pgprot_t pgprot; - - /* Don't allow wraparound or zero size */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; - - /* Create a read/write, MMIO VA mapping homed at the requested shim. */ - pgprot = PAGE_KERNEL; - pgprot = hv_pte_set_mode(pgprot, HV_PTE_MODE_MMIO); - pgprot = hv_pte_set_lotar(pgprot, hv_pte_get_lotar(home)); - - /* - * Mappings have to be page-aligned - */ - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - size = PAGE_ALIGN(last_addr+1) - phys_addr; - - /* - * Ok, go for it.. - */ - area = get_vm_area(size, VM_IOREMAP /* | other flags? */); - if (!area) - return NULL; - area->phys_addr = phys_addr; - addr = area->addr; - if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, - phys_addr, pgprot)) { - free_vm_area(area); - return NULL; - } - return (__force void __iomem *) (offset + (char *)addr); -} -EXPORT_SYMBOL(ioremap_prot); - -#if !defined(CONFIG_PCI) || !defined(CONFIG_TILEGX) -/* ioremap is conditionally declared in pci_gx.c */ - -void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) -{ - return NULL; -} -EXPORT_SYMBOL(ioremap); - -#endif - -/* Unmap an MMIO VA mapping. */ -void iounmap(volatile void __iomem *addr_in) -{ - volatile void __iomem *addr = (volatile void __iomem *) - (PAGE_MASK & (unsigned long __force)addr_in); -#if 1 - vunmap((void * __force)addr); -#else - /* x86 uses this complicated flow instead of vunmap(). Is - * there any particular reason we should do the same? */ - struct vm_struct *p, *o; - - /* Use the vm area unlocked, assuming the caller - ensures there isn't another iounmap for the same address - in parallel. Reuse of the virtual address is prevented by - leaving it in the global lists until we're done with it. - cpa takes care of the direct mappings. */ - p = find_vm_area((void *)addr); - - if (!p) { - pr_err("iounmap: bad address %p\n", addr); - dump_stack(); - return; - } - - /* Finally remove it */ - o = remove_vm_area((void *)addr); - BUG_ON(p != o || o == NULL); - kfree(p); -#endif -} -EXPORT_SYMBOL(iounmap); - -#endif /* CHIP_HAS_MMIO() */ |