diff options
Diffstat (limited to 'arch/arc/mm')
-rw-r--r-- | arch/arc/mm/cache_arc700.c | 163 | ||||
-rw-r--r-- | arch/arc/mm/fault.c | 19 | ||||
-rw-r--r-- | arch/arc/mm/init.c | 8 | ||||
-rw-r--r-- | arch/arc/mm/tlb.c | 255 | ||||
-rw-r--r-- | arch/arc/mm/tlbex.S | 209 |
5 files changed, 347 insertions, 307 deletions
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index f415d851b765..6b58c1de7577 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -182,7 +182,7 @@ void arc_cache_init(void) #ifdef CONFIG_ARC_HAS_ICACHE /* 1. Confirm some of I-cache params which Linux assumes */ - if (ic->line_len != ARC_ICACHE_LINE_LEN) + if (ic->line_len != L1_CACHE_BYTES) panic("Cache H/W doesn't match kernel Config"); if (ic->ver != CONFIG_ARC_MMU_VER) @@ -205,7 +205,7 @@ chk_dc: return; #ifdef CONFIG_ARC_HAS_DCACHE - if (dc->line_len != ARC_DCACHE_LINE_LEN) + if (dc->line_len != L1_CACHE_BYTES) panic("Cache H/W doesn't match kernel Config"); /* check for D-Cache aliasing */ @@ -240,6 +240,67 @@ chk_dc: #define OP_INV 0x1 #define OP_FLUSH 0x2 #define OP_FLUSH_N_INV 0x3 +#define OP_INV_IC 0x4 + +/* + * Common Helper for Line Operations on {I,D}-Cache + */ +static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int cacheop) +{ + unsigned int aux_cmd, aux_tag; + int num_lines; + const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE; + + if (cacheop == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; + aux_tag = ARC_REG_IC_PTAG; + } + else { + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; + aux_tag = ARC_REG_DC_PTAG; + } + + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). + */ + if (!full_page_op) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + vaddr &= CACHE_LINE_MASK; + } + + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + +#if (CONFIG_ARC_MMU_VER <= 2) + /* MMUv2 and before: paddr contains stuffed vaddrs bits */ + paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; +#else + /* if V-P const for loop, PTAG can be written once outside loop */ + if (full_page_op) + write_aux_reg(ARC_REG_DC_PTAG, paddr); +#endif + + while (num_lines-- > 0) { +#if (CONFIG_ARC_MMU_VER > 2) + /* MMUv3, cache ops require paddr seperately */ + if (!full_page_op) { + write_aux_reg(aux_tag, paddr); + paddr += L1_CACHE_BYTES; + } + + write_aux_reg(aux_cmd, vaddr); + vaddr += L1_CACHE_BYTES; +#else + write_aux_reg(aux, paddr); + paddr += L1_CACHE_BYTES; +#endif + } +} #ifdef CONFIG_ARC_HAS_DCACHE @@ -289,53 +350,6 @@ static inline void __dc_entire_op(const int cacheop) write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH); } -/* - * Per Line Operation on D-Cache - * Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete - * It's sole purpose is to help gcc generate ZOL - * (aliasing VIPT dcache flushing needs both vaddr and paddr) - */ -static inline void __dc_line_loop(unsigned long paddr, unsigned long vaddr, - unsigned long sz, const int aux_reg) -{ - int num_lines; - - /* Ensure we properly floor/ceil the non-line aligned/sized requests - * and have @paddr - aligned to cache line and integral @num_lines. - * This however can be avoided for page sized since: - * -@paddr will be cache-line aligned already (being page aligned) - * -@sz will be integral multiple of line size (being page sized). - */ - if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) { - sz += paddr & ~DCACHE_LINE_MASK; - paddr &= DCACHE_LINE_MASK; - vaddr &= DCACHE_LINE_MASK; - } - - num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN); - -#if (CONFIG_ARC_MMU_VER <= 2) - paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; -#endif - - while (num_lines-- > 0) { -#if (CONFIG_ARC_MMU_VER > 2) - /* - * Just as for I$, in MMU v3, D$ ops also require - * "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops - */ - write_aux_reg(ARC_REG_DC_PTAG, paddr); - - write_aux_reg(aux_reg, vaddr); - vaddr += ARC_DCACHE_LINE_LEN; -#else - /* paddr contains stuffed vaddrs bits */ - write_aux_reg(aux_reg, paddr); -#endif - paddr += ARC_DCACHE_LINE_LEN; - } -} - /* For kernel mappings cache operation: index is same as paddr */ #define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op) @@ -346,7 +360,6 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, unsigned long sz, const int cacheop) { unsigned long flags, tmp = tmp; - int aux; local_irq_save(flags); @@ -361,12 +374,7 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH); } - if (cacheop & OP_INV) /* Inv / flush-n-inv use same cmd reg */ - aux = ARC_REG_DC_IVDL; - else - aux = ARC_REG_DC_FLDL; - - __dc_line_loop(paddr, vaddr, sz, aux); + __cache_line_loop(paddr, vaddr, sz, cacheop); if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */ wait_for_flush(); @@ -438,42 +446,9 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, unsigned long sz) { unsigned long flags; - int num_lines; - - /* - * Ensure we properly floor/ceil the non-line aligned/sized requests: - * However page sized flushes can be compile time optimised. - * -@paddr will be cache-line aligned already (being page aligned) - * -@sz will be integral multiple of line size (being page sized). - */ - if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) { - sz += paddr & ~ICACHE_LINE_MASK; - paddr &= ICACHE_LINE_MASK; - vaddr &= ICACHE_LINE_MASK; - } - - num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN); - -#if (CONFIG_ARC_MMU_VER <= 2) - /* bits 17:13 of vaddr go as bits 4:0 of paddr */ - paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; -#endif local_irq_save(flags); - while (num_lines-- > 0) { -#if (CONFIG_ARC_MMU_VER > 2) - /* tag comes from phy addr */ - write_aux_reg(ARC_REG_IC_PTAG, paddr); - - /* index bits come from vaddr */ - write_aux_reg(ARC_REG_IC_IVIL, vaddr); - vaddr += ARC_ICACHE_LINE_LEN; -#else - /* paddr contains stuffed vaddrs bits */ - write_aux_reg(ARC_REG_IC_IVIL, paddr); -#endif - paddr += ARC_ICACHE_LINE_LEN; - } + __cache_line_loop(paddr, vaddr, sz, OP_INV_IC); local_irq_restore(flags); } @@ -622,12 +597,12 @@ void flush_icache_range(unsigned long kstart, unsigned long kend) /* * General purpose helper to make I and D cache lines consistent. * @paddr is phy addr of region - * @vaddr is typically user or kernel vaddr (vmalloc) - * Howver in one instance, flush_icache_range() by kprobe (for a breakpt in + * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc) + * However in one instance, when called by kprobe (for a breakpt in * builtin kernel code) @vaddr will be paddr only, meaning CDU operation will * use a paddr to index the cache (despite VIPT). This is fine since since a - * built-in kernel page will not have any virtual mappings (not even kernel) - * kprobe on loadable module is different as it will have kvaddr. + * builtin kernel page will not have any virtual mappings. + * kprobe on loadable module will be kernel vaddr. */ void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len) { diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 0fd1f0d515ff..9c69552350c4 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -17,7 +17,7 @@ #include <asm/pgalloc.h> #include <asm/mmu.h> -static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address) +static int handle_vmalloc_fault(unsigned long address) { /* * Synchronize this task's top level page-table @@ -27,7 +27,7 @@ static int handle_vmalloc_fault(struct mm_struct *mm, unsigned long address) pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; - pgd = pgd_offset_fast(mm, address); + pgd = pgd_offset_fast(current->active_mm, address); pgd_k = pgd_offset_k(address); if (!pgd_present(*pgd_k)) @@ -52,7 +52,7 @@ bad_area: return 1; } -void do_page_fault(struct pt_regs *regs, unsigned long address) +void do_page_fault(unsigned long address, struct pt_regs *regs) { struct vm_area_struct *vma = NULL; struct task_struct *tsk = current; @@ -60,8 +60,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address) siginfo_t info; int fault, ret; int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE | - (write ? FAULT_FLAG_WRITE : 0); + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; /* * We fault-in kernel-space virtual memory on-demand. The @@ -73,7 +72,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address) * nothing more. */ if (address >= VMALLOC_START && address <= VMALLOC_END) { - ret = handle_vmalloc_fault(mm, address); + ret = handle_vmalloc_fault(address); if (unlikely(ret)) goto bad_area_nosemaphore; else @@ -89,6 +88,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address) if (in_atomic() || !mm) goto no_context; + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; retry: down_read(&mm->mmap_sem); vma = find_vma(mm, address); @@ -117,12 +118,12 @@ good_area: if (write) { if (!(vma->vm_flags & VM_WRITE)) goto bad_area; + flags |= FAULT_FLAG_WRITE; } else { if (!(vma->vm_flags & (VM_READ | VM_EXEC))) goto bad_area; } -survive: /* * If for any reason at all we couldn't handle the fault, * make sure we exit gracefully rather than endlessly redo @@ -201,10 +202,6 @@ no_context: die("Oops", regs, address); out_of_memory: - if (is_global_init(tsk)) { - yield(); - goto survive; - } up_read(&mm->mmap_sem); if (user_mode(regs)) { diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index a08ce7185423..55e0a85bea78 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -125,11 +125,3 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) free_reserved_area((void *)start, (void *)end, -1, "initrd"); } #endif - -#ifdef CONFIG_OF_FLATTREE -void __init early_init_dt_setup_initrd_arch(unsigned long start, - unsigned long end) -{ - pr_err("%s(%lx, %lx)\n", __func__, start, end); -} -#endif /* CONFIG_OF_FLATTREE */ diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 7957dc4e4d4a..e1acf0ce5647 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -52,6 +52,7 @@ */ #include <linux/module.h> +#include <linux/bug.h> #include <asm/arcregs.h> #include <asm/setup.h> #include <asm/mmu_context.h> @@ -99,48 +100,45 @@ /* A copy of the ASID from the PID reg is kept in asid_cache */ -int asid_cache = FIRST_ASID; - -/* ASID to mm struct mapping. We have one extra entry corresponding to - * NO_ASID to save us a compare when clearing the mm entry for old asid - * see get_new_mmu_context (asm-arc/mmu_context.h) - */ -struct mm_struct *asid_mm_map[NUM_ASID + 1]; +DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; /* * Utility Routine to erase a J-TLB entry - * The procedure is to look it up in the MMU. If found, ERASE it by - * issuing a TlbWrite CMD with PD0 = PD1 = 0 + * Caller needs to setup Index Reg (manually or via getIndex) */ - -static void __tlb_entry_erase(void) +static inline void __tlb_entry_erase(void) { write_aux_reg(ARC_REG_TLBPD1, 0); write_aux_reg(ARC_REG_TLBPD0, 0); write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } -static void tlb_entry_erase(unsigned int vaddr_n_asid) +static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid) { unsigned int idx; - /* Locate the TLB entry for this vaddr + ASID */ write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe); idx = read_aux_reg(ARC_REG_TLBINDEX); + return idx; +} + +static void tlb_entry_erase(unsigned int vaddr_n_asid) +{ + unsigned int idx; + + /* Locate the TLB entry for this vaddr + ASID */ + idx = tlb_entry_lkup(vaddr_n_asid); + /* No error means entry found, zero it out */ if (likely(!(idx & TLB_LKUP_ERR))) { __tlb_entry_erase(); - } else { /* Some sort of Error */ - + } else { /* Duplicate entry error */ - if (idx & 0x1) { - /* TODO we need to handle this case too */ - pr_emerg("unhandled Duplicate flush for %x\n", - vaddr_n_asid); - } - /* else entry not found so nothing to do */ + WARN(idx == TLB_DUP_ERR, "Probe returned Dup PD for %x\n", + vaddr_n_asid); } } @@ -159,7 +157,7 @@ static void utlb_invalidate(void) { #if (CONFIG_ARC_MMU_VER >= 2) -#if (CONFIG_ARC_MMU_VER < 3) +#if (CONFIG_ARC_MMU_VER == 2) /* MMU v2 introduced the uTLB Flush command. * There was however an obscure hardware bug, where uTLB flush would * fail when a prior probe for J-TLB (both totally unrelated) would @@ -182,6 +180,36 @@ static void utlb_invalidate(void) } +static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +{ + unsigned int idx; + + /* + * First verify if entry for this vaddr+ASID already exists + * This also sets up PD0 (vaddr, ASID..) for final commit + */ + idx = tlb_entry_lkup(pd0); + + /* + * If Not already present get a free slot from MMU. + * Otherwise, Probe would have located the entry and set INDEX Reg + * with existing location. This will cause Write CMD to over-write + * existing entry with new PD0 and PD1 + */ + if (likely(idx & TLB_LKUP_ERR)) + write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex); + + /* setup the other half of TLB entry (pfn, rwx..) */ + write_aux_reg(ARC_REG_TLBPD1, pd1); + + /* + * Commit the Entry to MMU + * It doesnt sound safe to use the TLBWriteNI cmd here + * which doesn't flush uTLBs. I'd rather be safe than sorry. + */ + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); +} + /* * Un-conditionally (without lookup) erase the entire MMU contents */ @@ -224,13 +252,14 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm) return; /* - * Workaround for Android weirdism: - * A binder VMA could end up in a task such that vma->mm != tsk->mm - * old code would cause h/w - s/w ASID to get out of sync + * - Move to a new ASID, but only if the mm is still wired in + * (Android Binder ended up calling this for vma->mm != tsk->mm, + * causing h/w - s/w ASID to get out of sync) + * - Also get_new_mmu_context() new implementation allocates a new + * ASID only if it is not allocated already - so unallocate first */ - if (current->mm != mm) - destroy_context(mm); - else + destroy_context(mm); + if (current->mm == mm) get_new_mmu_context(mm); } @@ -245,8 +274,8 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm) void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + const unsigned int cpu = smp_processor_id(); unsigned long flags; - unsigned int asid; /* If range @start to @end is more than 32 TLB entries deep, * its better to move to a new ASID rather than searching for @@ -268,11 +297,10 @@ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, start &= PAGE_MASK; local_irq_save(flags); - asid = vma->vm_mm->context.asid; - if (asid != NO_ASID) { + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { while (start < end) { - tlb_entry_erase(start | (asid & 0xff)); + tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu)); start += PAGE_SIZE; } } @@ -319,6 +347,7 @@ void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { + const unsigned int cpu = smp_processor_id(); unsigned long flags; /* Note that it is critical that interrupts are DISABLED between @@ -326,23 +355,95 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) */ local_irq_save(flags); - if (vma->vm_mm->context.asid != NO_ASID) { - tlb_entry_erase((page & PAGE_MASK) | - (vma->vm_mm->context.asid & 0xff)); + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { + tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu)); utlb_invalidate(); } local_irq_restore(flags); } +#ifdef CONFIG_SMP + +struct tlb_args { + struct vm_area_struct *ta_vma; + unsigned long ta_start; + unsigned long ta_end; +}; + +static inline void ipi_flush_tlb_page(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_tlb_page(ta->ta_vma, ta->ta_start); +} + +static inline void ipi_flush_tlb_range(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} + +static inline void ipi_flush_tlb_kernel_range(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end); +} + +void flush_tlb_all(void) +{ + on_each_cpu((smp_call_func_t)local_flush_tlb_all, NULL, 1); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + on_each_cpu_mask(mm_cpumask(mm), (smp_call_func_t)local_flush_tlb_mm, + mm, 1); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = uaddr + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = start, + .ta_end = end + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + struct tlb_args ta = { + .ta_start = start, + .ta_end = end + }; + + on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); +} +#endif + /* * Routine to create a TLB entry */ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { unsigned long flags; - unsigned int idx, asid_or_sasid; - unsigned long pd0_flags; + unsigned int asid_or_sasid, rwx; + unsigned long pd0, pd1; /* * create_tlb() assumes that current->mm == vma->mm, since @@ -374,47 +475,37 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) local_irq_save(flags); - tlb_paranoid_check(vma->vm_mm->context.asid, address); + tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address); address &= PAGE_MASK; /* update this PTE credentials */ pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED); - /* Create HW TLB entry Flags (in PD0) from PTE Flags */ -#if (CONFIG_ARC_MMU_VER <= 2) - pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0) >> 1); -#else - pd0_flags = ((pte_val(*ptep) & PTE_BITS_IN_PD0)); -#endif + /* Create HW TLB(PD0,PD1) from PTE */ /* ASID for this task */ asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff; - write_aux_reg(ARC_REG_TLBPD0, address | pd0_flags | asid_or_sasid); - - /* Load remaining info in PD1 (Page Frame Addr and Kx/Kw/Kr Flags) */ - write_aux_reg(ARC_REG_TLBPD1, (pte_val(*ptep) & PTE_BITS_IN_PD1)); - - /* First verify if entry for this vaddr+ASID already exists */ - write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe); - idx = read_aux_reg(ARC_REG_TLBINDEX); + pd0 = address | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); /* - * If Not already present get a free slot from MMU. - * Otherwise, Probe would have located the entry and set INDEX Reg - * with existing location. This will cause Write CMD to over-write - * existing entry with new PD0 and PD1 + * ARC MMU provides fully orthogonal access bits for K/U mode, + * however Linux only saves 1 set to save PTE real-estate + * Here we convert 3 PTE bits into 6 MMU bits: + * -Kernel only entries have Kr Kw Kx 0 0 0 + * -User entries have mirrored K and U bits */ - if (likely(idx & TLB_LKUP_ERR)) - write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex); + rwx = pte_val(*ptep) & PTE_BITS_RWX; - /* - * Commit the Entry to MMU - * It doesnt sound safe to use the TLBWriteNI cmd here - * which doesn't flush uTLBs. I'd rather be safe than sorry. - */ - write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + if (pte_val(*ptep) & _PAGE_GLOBAL) + rwx <<= 3; /* r w x => Kr Kw Kx 0 0 0 */ + else + rwx |= (rwx << 3); /* r w x => Kr Kw Kx Ur Uw Ux */ + + pd1 = rwx | (pte_val(*ptep) & PTE_BITS_NON_RWX_IN_PD1); + + tlb_entry_insert(pd0, pd1); local_irq_restore(flags); } @@ -553,13 +644,6 @@ void arc_mmu_init(void) if (mmu->pg_sz != PAGE_SIZE) panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); - /* - * ASID mgmt data structures are compile time init - * asid_cache = FIRST_ASID and asid_mm_map[] all zeroes - */ - - local_flush_tlb_all(); - /* Enable the MMU */ write_aux_reg(ARC_REG_PID, MMU_ENABLE); @@ -601,9 +685,9 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, struct pt_regs *regs) { int set, way, n; - unsigned int pd0[4], pd1[4]; /* assume max 4 ways */ unsigned long flags, is_valid; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + unsigned int pd0[mmu->ways], pd1[mmu->ways]; local_irq_save(flags); @@ -628,7 +712,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, continue; /* Scan the set for duplicate ways: needs a nested loop */ - for (way = 0; way < mmu->ways; way++) { + for (way = 0; way < mmu->ways - 1; way++) { if (!pd0[way]) continue; @@ -671,25 +755,28 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS * don't match */ -void print_asid_mismatch(int is_fast_path) +void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path) { - int pid_sw, pid_hw; - pid_sw = current->active_mm->context.asid; - pid_hw = read_aux_reg(ARC_REG_PID) & 0xff; - pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n", - is_fast_path ? "Fast" : "Slow", pid_sw, pid_hw); + is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid); __asm__ __volatile__("flag 1"); } -void tlb_paranoid_check(unsigned int pid_sw, unsigned long addr) +void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr) { - unsigned int pid_hw; + unsigned int mmu_asid; - pid_hw = read_aux_reg(ARC_REG_PID) & 0xff; + mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff; - if (addr < 0x70000000 && ((pid_hw != pid_sw) || (pid_sw == NO_ASID))) - print_asid_mismatch(0); + /* + * At the time of a TLB miss/installation + * - HW version needs to match SW version + * - SW needs to have a valid ASID + */ + if (addr < 0x70000000 && + ((mm_asid == MM_CTXT_NO_ASID) || + (mmu_asid != (mm_asid & MM_CTXT_ASID_MASK)))) + print_asid_mismatch(mm_asid, mmu_asid, 0); } #endif diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index 5c5bb23001b0..3fcfdb38d242 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -44,17 +44,36 @@ #include <asm/arcregs.h> #include <asm/cache.h> #include <asm/processor.h> -#if (CONFIG_ARC_MMU_VER == 1) #include <asm/tlb-mmu1.h> -#endif -;-------------------------------------------------------------------------- -; scratch memory to save the registers (r0-r3) used to code TLB refill Handler -; For details refer to comments before TLBMISS_FREEUP_REGS below +;----------------------------------------------------------------- +; ARC700 Exception Handling doesn't auto-switch stack and it only provides +; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" +; +; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a +; "global" is used to free-up FIRST core reg to be able to code the rest of +; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe). +; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3 +; need to be saved as well by extending the "global" to be 4 words. Hence +; ".size ex_saved_reg1, 16" +; [All of this dance is to avoid stack switching for each TLB Miss, since we +; only need to save only a handful of regs, as opposed to complete reg file] +; +; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST +; core reg as it will not be SMP safe. +; Thus scratch AUX reg is used (and no longer used to cache task PGD). +; To save the rest of 3 regs - per cpu, the global is made "per-cpu". +; Epilogue thus has to locate the "per-cpu" storage for regs. +; To avoid cache line bouncing the per-cpu global is aligned/sized per +; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence +; ".size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)" + +; As simple as that.... ;-------------------------------------------------------------------------- +; scratch memory to save [r0-r3] used to code TLB refill Handler ARCFP_DATA ex_saved_reg1 - .align 1 << L1_CACHE_SHIFT ; IMP: Must be Cache Line aligned + .align 1 << L1_CACHE_SHIFT .type ex_saved_reg1, @object #ifdef CONFIG_SMP .size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT) @@ -66,6 +85,44 @@ ex_saved_reg1: .zero 16 #endif +.macro TLBMISS_FREEUP_REGS +#ifdef CONFIG_SMP + sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with + GET_CPU_ID r0 ; get to per cpu scratch mem, + lsl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu + add r0, @ex_saved_reg1, r0 +#else + st r0, [@ex_saved_reg1] + mov_s r0, @ex_saved_reg1 +#endif + st_s r1, [r0, 4] + st_s r2, [r0, 8] + st_s r3, [r0, 12] + + ; VERIFY if the ASID in MMU-PID Reg is same as + ; one in Linux data structures + + tlb_paranoid_check_asm +.endm + +.macro TLBMISS_RESTORE_REGS +#ifdef CONFIG_SMP + GET_CPU_ID r0 ; get to per cpu scratch mem + lsl r0, r0, L1_CACHE_SHIFT ; each is cache line wide + add r0, @ex_saved_reg1, r0 + ld_s r3, [r0,12] + ld_s r2, [r0, 8] + ld_s r1, [r0, 4] + lr r0, [ARC_REG_SCRATCH_DATA0] +#else + mov_s r0, @ex_saved_reg1 + ld_s r3, [r0,12] + ld_s r2, [r0, 8] + ld_s r1, [r0, 4] + ld_s r0, [r0] +#endif +.endm + ;============================================================================ ; Troubleshooting Stuff ;============================================================================ @@ -76,34 +133,35 @@ ex_saved_reg1: ; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble. ; So we try to detect this in TLB Mis shandler - -.macro DBG_ASID_MISMATCH +.macro tlb_paranoid_check_asm #ifdef CONFIG_ARC_DBG_TLB_PARANOIA - ; make sure h/w ASID is same as s/w ASID - GET_CURR_TASK_ON_CPU r3 ld r0, [r3, TASK_ACT_MM] ld r0, [r0, MM_CTXT+MM_CTXT_ASID] + breq r0, 0, 55f ; Error if no ASID allocated lr r1, [ARC_REG_PID] and r1, r1, 0xFF - breq r1, r0, 5f + and r2, r0, 0xFF ; MMU PID bits only for comparison + breq r1, r2, 5f + +55: ; Error if H/w and S/w ASID don't match, but NOT if in kernel mode - lr r0, [erstatus] - bbit0 r0, STATUS_U_BIT, 5f + lr r2, [erstatus] + bbit0 r2, STATUS_U_BIT, 5f ; We sure are in troubled waters, Flag the error, but to do so ; need to switch to kernel mode stack to call error routine GET_TSK_STACK_BASE r3, sp ; Call printk to shoutout aloud - mov r0, 1 + mov r2, 1 j print_asid_mismatch -5: ; ASIDs match so proceed normally +5: ; ASIDs match so proceed normally nop #endif @@ -161,13 +219,17 @@ ex_saved_reg1: ; IN: r0 = PTE, r1 = ptr to PTE .macro CONV_PTE_TO_TLB - and r3, r0, PTE_BITS_IN_PD1 ; Extract permission flags+PFN from PTE - sr r3, [ARC_REG_TLBPD1] ; these go in PD1 + and r3, r0, PTE_BITS_RWX ; r w x + lsl r2, r3, 3 ; r w x 0 0 0 + and.f 0, r0, _PAGE_GLOBAL + or.z r2, r2, r3 ; r w x r w x + + and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE + or r3, r3, r2 + + sr r3, [ARC_REG_TLBPD1] ; these go in PD1 and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb -#if (CONFIG_ARC_MMU_VER <= 2) /* Neednot be done with v3 onwards */ - lsr r2, r2 ; shift PTE flags to match layout in PD0 -#endif lr r3,[ARC_REG_TLBPD0] ; MMU prepares PD0 with vaddr and asid @@ -191,68 +253,6 @@ ex_saved_reg1: #endif .endm -;----------------------------------------------------------------- -; ARC700 Exception Handling doesn't auto-switch stack and it only provides -; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" -; -; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a -; "global" is used to free-up FIRST core reg to be able to code the rest of -; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe). -; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3 -; need to be saved as well by extending the "global" to be 4 words. Hence -; ".size ex_saved_reg1, 16" -; [All of this dance is to avoid stack switching for each TLB Miss, since we -; only need to save only a handful of regs, as opposed to complete reg file] -; -; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST -; core reg as it will not be SMP safe. -; Thus scratch AUX reg is used (and no longer used to cache task PGD). -; To save the rest of 3 regs - per cpu, the global is made "per-cpu". -; Epilogue thus has to locate the "per-cpu" storage for regs. -; To avoid cache line bouncing the per-cpu global is aligned/sized per -; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence -; ".size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)" - -; As simple as that.... - -.macro TLBMISS_FREEUP_REGS -#ifdef CONFIG_SMP - sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with - GET_CPU_ID r0 ; get to per cpu scratch mem, - lsl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu - add r0, @ex_saved_reg1, r0 -#else - st r0, [@ex_saved_reg1] - mov_s r0, @ex_saved_reg1 -#endif - st_s r1, [r0, 4] - st_s r2, [r0, 8] - st_s r3, [r0, 12] - - ; VERIFY if the ASID in MMU-PID Reg is same as - ; one in Linux data structures - - DBG_ASID_MISMATCH -.endm - -;----------------------------------------------------------------- -.macro TLBMISS_RESTORE_REGS -#ifdef CONFIG_SMP - GET_CPU_ID r0 ; get to per cpu scratch mem - lsl r0, r0, L1_CACHE_SHIFT ; each is cache line wide - add r0, @ex_saved_reg1, r0 - ld_s r3, [r0,12] - ld_s r2, [r0, 8] - ld_s r1, [r0, 4] - lr r0, [ARC_REG_SCRATCH_DATA0] -#else - mov_s r0, @ex_saved_reg1 - ld_s r3, [r0,12] - ld_s r2, [r0, 8] - ld_s r1, [r0, 4] - ld_s r0, [r0] -#endif -.endm ARCFP_CODE ;Fast Path Code, candidate for ICCM @@ -277,8 +277,8 @@ ARC_ENTRY EV_TLBMissI ;---------------------------------------------------------------- ; VERIFY_PTE: Check if PTE permissions approp for executing code cmp_s r2, VMALLOC_START - mov.lo r2, (_PAGE_PRESENT | _PAGE_U_EXECUTE) - mov.hs r2, (_PAGE_PRESENT | _PAGE_K_EXECUTE) + mov_s r2, (_PAGE_PRESENT | _PAGE_EXECUTE) + or.hs r2, r2, _PAGE_GLOBAL and r3, r0, r2 ; Mask out NON Flag bits from PTE xor.f r3, r3, r2 ; check ( ( pte & flags_test ) == flags_test ) @@ -317,26 +317,21 @@ ARC_ENTRY EV_TLBMissD ;---------------------------------------------------------------- ; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W) - mov_s r2, 0 + cmp_s r2, VMALLOC_START + mov_s r2, _PAGE_PRESENT ; common bit for K/U PTE + or.hs r2, r2, _PAGE_GLOBAL ; kernel PTE only + + ; Linux PTE [RWX] bits are semantically overloaded: + ; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc) + ; -Otherwise they are user-mode permissions, and those are exactly + ; same for kernel mode as well (e.g. copy_(to|from)_user) + lr r3, [ecr] btst_s r3, ECR_C_BIT_DTLB_LD_MISS ; Read Access - or.nz r2, r2, _PAGE_U_READ ; chk for Read flag in PTE + or.nz r2, r2, _PAGE_READ ; chk for Read flag in PTE btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; Write Access - or.nz r2, r2, _PAGE_U_WRITE ; chk for Write flag in PTE - ; Above laddering takes care of XCHG access - ; which is both Read and Write - - ; If kernel mode access, ; make _PAGE_xx flags as _PAGE_K_xx - ; For copy_(to|from)_user, despite exception taken in kernel mode, - ; this code is not hit, because EFA would still be the user mode - ; address (EFA < 0x6000_0000). - ; This code is for legit kernel mode faults, vmalloc specifically - ; (EFA: 0x7000_0000 to 0x7FFF_FFFF) - - lr r3, [efa] - cmp r3, VMALLOC_START - 1 ; If kernel mode access - asl.hi r2, r2, 3 ; make _PAGE_xx flags as _PAGE_K_xx - or r2, r2, _PAGE_PRESENT ; Common flag for K/U mode + or.nz r2, r2, _PAGE_WRITE ; chk for Write flag in PTE + ; Above laddering takes care of XCHG access (both R and W) ; By now, r2 setup with all the Flags we need to check in PTE and r3, r0, r2 ; Mask out NON Flag bits from PTE @@ -371,17 +366,11 @@ do_slow_path_pf: ; Slow path TLB Miss handled as a regular ARC Exception ; (stack switching / save the complete reg-file). - ; That requires freeing up r9 - EXCPN_PROLOG_FREEUP_REG r9 - - lr r9, [erstatus] - - SWITCH_TO_KERNEL_STK - SAVE_ALL_SYS + EXCEPTION_PROLOGUE ; ------- setup args for Linux Page fault Hanlder --------- - mov_s r0, sp - lr r1, [efa] + mov_s r1, sp + lr r0, [efa] ; We don't want exceptions to be disabled while the fault is handled. ; Now that we have saved the context we return from exception hence |