aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/mm/fault.c
AgeCommit message (Collapse)AuthorFilesLines
2020-05-19powerpc: Use a datatype for instructionsJordan Niethe1-2/+2
Currently unsigned ints are used to represent instructions on powerpc. This has worked well as instructions have always been 4 byte words. However, ISA v3.1 introduces some changes to instructions that mean this scheme will no longer work as well. This change is Prefixed Instructions. A prefixed instruction is made up of a word prefix followed by a word suffix to make an 8 byte double word instruction. No matter the endianness of the system the prefix always comes first. Prefixed instructions are only planned for powerpc64. Introduce a ppc_inst type to represent both prefixed and word instructions on powerpc64 while keeping it possible to exclusively have word instructions on powerpc32. Signed-off-by: Jordan Niethe <[email protected]> [mpe: Fix compile error in emulate_spe()] Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
2020-05-19powerpc: Use a function for getting the instruction op codeJordan Niethe1-1/+2
In preparation for using a data type for instructions that can not be directly used with the '>>' operator use a function for getting the op code of an instruction. Signed-off-by: Jordan Niethe <[email protected]> Reviewed-by: Alistair Popple <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
2020-05-19powerpc: Use an accessor for instructionsJordan Niethe1-3/+3
In preparation for introducing a more complicated instruction type to accommodate prefixed instructions use an accessor for getting an instruction as a u32. Signed-off-by: Jordan Niethe <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
2020-05-05powerpc/pkeys: Check vma before returning key fault error to the userAneesh Kumar K.V1-8/+0
If multiple threads in userspace keep changing the protection keys mapping a range, there can be a scenario where kernel takes a key fault but the pkey value found in the siginfo struct is a permissive one. This can confuse the userspace as shown in the below test case. /* use this to control the number of test iterations */ static void pkeyreg_set(int pkey, unsigned long rights) { unsigned long reg, shift; shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY; asm volatile("mfspr %0, 0xd" : "=r"(reg)); reg &= ~(((unsigned long) PKEY_BITS_MASK) << shift); reg |= (rights & PKEY_BITS_MASK) << shift; asm volatile("mtspr 0xd, %0" : : "r"(reg)); } static unsigned long pkeyreg_get(void) { unsigned long reg; asm volatile("mfspr %0, 0xd" : "=r"(reg)); return reg; } static int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey) { return syscall(SYS_pkey_mprotect, addr, len, prot, pkey); } static int sys_pkey_alloc(unsigned long flags, unsigned long access_rights) { return syscall(SYS_pkey_alloc, flags, access_rights); } static int sys_pkey_free(int pkey) { return syscall(SYS_pkey_free, pkey); } static int faulting_pkey; static int permissive_pkey; static pthread_barrier_t pkey_set_barrier; static pthread_barrier_t mprotect_barrier; static void pkey_handle_fault(int signum, siginfo_t *sinfo, void *ctx) { unsigned long pkeyreg; /* FIXME: printf is not signal-safe but for the current purpose, it gets the job done. */ printf("pkey: exp = %d, got = %d\n", faulting_pkey, sinfo->si_pkey); fflush(stdout); assert(sinfo->si_code == SEGV_PKUERR); assert(sinfo->si_pkey == faulting_pkey); /* clear pkey permissions to let the faulting instruction continue */ pkeyreg_set(faulting_pkey, 0x0); } static void *do_mprotect_fault(void *p) { unsigned long rights, pkeyreg, pgsize; unsigned int i; void *region; int pkey; srand(time(NULL)); pgsize = sysconf(_SC_PAGESIZE); rights = PKEY_DISABLE_WRITE; region = p; /* allocate key, no permissions */ assert((pkey = sys_pkey_alloc(0, PKEY_DISABLE_ACCESS)) > 0); pkeyreg_set(4, 0x0); /* cache the pkey here as the faulting pkey for future reference in the signal handler */ faulting_pkey = pkey; printf("%s: faulting pkey = %d\n", __func__, faulting_pkey); /* try to allocate, mprotect and free pkeys repeatedly */ for (i = 0; i < NUM_ITERATIONS; i++) { /* sync up with the other thread here */ pthread_barrier_wait(&pkey_set_barrier); /* make sure that the pkey used by the non-faulting thread is made permissive for this thread's context too so that no faults are triggered because it still might have been set to a restrictive value */ // pkeyreg_set(permissive_pkey, 0x0); /* sync up with the other thread here */ pthread_barrier_wait(&mprotect_barrier); /* perform mprotect */ assert(!sys_pkey_mprotect(region, pgsize, PROT_READ | PROT_WRITE, pkey)); /* choose a random byte from the protected region and attempt to write to it, this will generate a fault */ *((char *) region + (rand() % pgsize)) = rand(); /* restore pkey permissions as the signal handler may have cleared the bit out for the sake of continuing */ pkeyreg_set(pkey, PKEY_DISABLE_WRITE); } /* free pkey */ sys_pkey_free(pkey); return NULL; } static void *do_mprotect_nofault(void *p) { unsigned long pgsize; unsigned int i, j; void *region; int pkey; pgsize = sysconf(_SC_PAGESIZE); region = p; /* try to allocate, mprotect and free pkeys repeatedly */ for (i = 0; i < NUM_ITERATIONS; i++) { /* allocate pkey, all permissions */ assert((pkey = sys_pkey_alloc(0, 0)) > 0); permissive_pkey = pkey; /* sync up with the other thread here */ pthread_barrier_wait(&pkey_set_barrier); pthread_barrier_wait(&mprotect_barrier); /* perform mprotect on the common page, no faults will be triggered as this is most permissive */ assert(!sys_pkey_mprotect(region, pgsize, PROT_READ | PROT_WRITE, pkey)); /* free pkey */ assert(!sys_pkey_free(pkey)); } return NULL; } int main(int argc, char **argv) { pthread_t fault_thread, nofault_thread; unsigned long pgsize; struct sigaction act; pthread_attr_t attr; cpu_set_t fault_cpuset, nofault_cpuset; unsigned int i; void *region; /* allocate memory region to protect */ pgsize = sysconf(_SC_PAGESIZE); assert(region = memalign(pgsize, pgsize)); CPU_ZERO(&fault_cpuset); CPU_SET(0, &fault_cpuset); CPU_ZERO(&nofault_cpuset); CPU_SET(8, &nofault_cpuset); assert(!pthread_attr_init(&attr)); /* setup sigsegv signal handler */ act.sa_handler = 0; act.sa_sigaction = pkey_handle_fault; assert(!sigprocmask(SIG_SETMASK, 0, &act.sa_mask)); act.sa_flags = SA_SIGINFO; act.sa_restorer = 0; assert(!sigaction(SIGSEGV, &act, NULL)); /* setup barrier for the two threads */ pthread_barrier_init(&pkey_set_barrier, NULL, 2); pthread_barrier_init(&mprotect_barrier, NULL, 2); /* setup and start threads */ assert(!pthread_create(&fault_thread, &attr, &do_mprotect_fault, region)); assert(!pthread_setaffinity_np(fault_thread, sizeof(cpu_set_t), &fault_cpuset)); assert(!pthread_create(&nofault_thread, &attr, &do_mprotect_nofault, region)); assert(!pthread_setaffinity_np(nofault_thread, sizeof(cpu_set_t), &nofault_cpuset)); /* cleanup */ assert(!pthread_attr_destroy(&attr)); assert(!pthread_join(fault_thread, NULL)); assert(!pthread_join(nofault_thread, NULL)); assert(!pthread_barrier_destroy(&pkey_set_barrier)); assert(!pthread_barrier_destroy(&mprotect_barrier)); free(region); puts("PASS"); return EXIT_SUCCESS; } The above test can result the below failure without this patch. pkey: exp = 3, got = 3 pkey: exp = 3, got = 4 a.out: pkey-siginfo-race.c:100: pkey_handle_fault: Assertion `sinfo->si_pkey == faulting_pkey' failed. Aborted Check for vma access before considering this a key fault. If vma pkey allow access retry the acess again. Test case is written by Sandipan Das <[email protected]> hence added SOB from him. Signed-off-by: Sandipan Das <[email protected]> Signed-off-by: Aneesh Kumar K.V <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
2020-05-05powerpc/pkeys: Avoid using lockless page table walkAneesh Kumar K.V1-23/+60
Fetch pkey from vma instead of linux page table. Also document the fact that in some cases the pkey returned in siginfo won't be the same as the one we took keyfault on. Even with linux page table walk, we can end up in a similar scenario. Signed-off-by: Aneesh Kumar K.V <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
2020-04-07mm/vma: make vma_is_accessible() available for general useAnshuman Khandual1-1/+1
Lets move vma_is_accessible() helper to include/linux/mm.h which makes it available for general use. While here, this replaces all remaining open encodings for VMA access check with vma_is_accessible(). Signed-off-by: Anshuman Khandual <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Acked-by: Geert Uytterhoeven <[email protected]> Acked-by: Guo Ren <[email protected]> Acked-by: Vlastimil Babka <[email protected]> Cc: Guo Ren <[email protected]> Cc: Geert Uytterhoeven <[email protected]> Cc: Ralf Baechle <[email protected]> Cc: Paul Burton <[email protected]> Cc: Benjamin Herrenschmidt <[email protected]> Cc: Paul Mackerras <[email protected]> Cc: Michael Ellerman <[email protected]> Cc: Yoshinori Sato <[email protected]> Cc: Rich Felker <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Steven Rostedt <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Alexander Viro <[email protected]> Cc: "Aneesh Kumar K.V" <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Arnd Bergmann <[email protected]> Cc: Nick Piggin <[email protected]> Cc: Paul Mackerras <[email protected]> Cc: Will Deacon <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
2020-04-02mm: allow VM_FAULT_RETRY for multiple timesPeter Xu1-6/+0
The idea comes from a discussion between Linus and Andrea [1]. Before this patch we only allow a page fault to retry once. We achieved this by clearing the FAULT_FLAG_ALLOW_RETRY flag when doing handle_mm_fault() the second time. This was majorly used to avoid unexpected starvation of the system by looping over forever to handle the page fault on a single page. However that should hardly happen, and after all for each code path to return a VM_FAULT_RETRY we'll first wait for a condition (during which time we should possibly yield the cpu) to happen before VM_FAULT_RETRY is really returned. This patch removes the restriction by keeping the FAULT_FLAG_ALLOW_RETRY flag when we receive VM_FAULT_RETRY. It means that the page fault handler now can retry the page fault for multiple times if necessary without the need to generate another page fault event. Meanwhile we still keep the FAULT_FLAG_TRIED flag so page fault handler can still identify whether a page fault is the first attempt or not. Then we'll have these combinations of fault flags (only considering ALLOW_RETRY flag and TRIED flag): - ALLOW_RETRY and !TRIED: this means the page fault allows to retry, and this is the first try - ALLOW_RETRY and TRIED: this means the page fault allows to retry, and this is not the first try - !ALLOW_RETRY and !TRIED: this means the page fault does not allow to retry at all - !ALLOW_RETRY and TRIED: this is forbidden and should never be used In existing code we have multiple places that has taken special care of the first condition above by checking against (fault_flags & FAULT_FLAG_ALLOW_RETRY). This patch introduces a simple helper to detect the first retry of a page fault by checking against both (fault_flags & FAULT_FLAG_ALLOW_RETRY) and !(fault_flag & FAULT_FLAG_TRIED) because now even the 2nd try will have the ALLOW_RETRY set, then use that helper in all existing special paths. One example is in __lock_page_or_retry(), now we'll drop the mmap_sem only in the first attempt of page fault and we'll keep it in follow up retries, so old locking behavior will be retained. This will be a nice enhancement for current code [2] at the same time a supporting material for the future userfaultfd-writeprotect work, since in that work there will always be an explicit userfault writeprotect retry for protected pages, and if that cannot resolve the page fault (e.g., when userfaultfd-writeprotect is used in conjunction with swapped pages) then we'll possibly need a 3rd retry of the page fault. It might also benefit other potential users who will have similar requirement like userfault write-protection. GUP code is not touched yet and will be covered in follow up patch. Please read the thread below for more information. [1] https://lore.kernel.org/lkml/[email protected]/ [2] https://lore.kernel.org/lkml/[email protected]/ Suggested-by: Linus Torvalds <[email protected]> Suggested-by: Andrea Arcangeli <[email protected]> Signed-off-by: Peter Xu <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Tested-by: Brian Geffon <[email protected]> Cc: Bobby Powers <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Denis Plotnikov <[email protected]> Cc: "Dr . David Alan Gilbert" <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Jerome Glisse <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: "Kirill A . Shutemov" <[email protected]> Cc: Martin Cracauer <[email protected]> Cc: Marty McFadden <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Maya Gokhale <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Mike Kravetz <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Pavel Emelyanov <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
2020-04-02mm: introduce FAULT_FLAG_DEFAULTPeter Xu1-1/+1
Although there're tons of arch-specific page fault handlers, most of them are still sharing the same initial value of the page fault flags. Say, merely all of the page fault handlers would allow the fault to be retried, and they also allow the fault to respond to SIGKILL. Let's define a default value for the fault flags to replace those initial page fault flags that were copied over. With this, it'll be far easier to introduce new fault flag that can be used by all the architectures instead of touching all the archs. Signed-off-by: Peter Xu <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Tested-by: Brian Geffon <[email protected]> Reviewed-by: David Hildenbrand <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Bobby Powers <[email protected]> Cc: Denis Plotnikov <[email protected]> Cc: "Dr . David Alan Gilbert" <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Jerome Glisse <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: "Kirill A . Shutemov" <[email protected]> Cc: Martin Cracauer <[email protected]> Cc: Marty McFadden <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Maya Gokhale <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Mike Kravetz <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Pavel Emelyanov <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
2020-04-02powerpc/mm: use helper fault_signal_pending()Peter Xu1-8/+4
Let powerpc code to use the new helper, by moving the signal handling earlier before the retry logic. Signed-off-by: Peter Xu <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Tested-by: Brian Geffon <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Bobby Powers <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Denis Plotnikov <[email protected]> Cc: "Dr . David Alan Gilbert" <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Jerome Glisse <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: "Kirill A . Shutemov" <[email protected]> Cc: Martin Cracauer <[email protected]> Cc: Marty McFadden <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Maya Gokhale <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Mike Kravetz <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Pavel Emelyanov <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
2020-02-01Merge branch 'topic/user-access-begin' into nextMichael Ellerman1-1/+1
Merge the user_access_begin() series from Christophe. This is based on a commit from Linus that went into v5.5-rc7.
2020-01-28powerpc/32s: Fix bad_kuap_fault()Christophe Leroy1-1/+1
At the moment, bad_kuap_fault() reports a fault only if a bad access to userspace occurred while access to userspace was not granted. But if a fault occurs for a write outside the allowed userspace segment(s) that have been unlocked, bad_kuap_fault() fails to detect it and the kernel loops forever in do_page_fault(). Fix it by checking that the accessed address is within the allowed range. Fixes: a68c31fc01ef ("powerpc/32s: Implement Kernel Userspace Access Protection") Cc: [email protected] # v5.2+ Signed-off-by: Christophe Leroy <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/f48244e9485ada0a304ed33ccbb8da271180c80d.1579866752.git.christophe.leroy@c-s.fr
2020-01-27powerpc/mm: Don't log user reads to 0xffffffffChristophe Leroy1-0/+3
Running vdsotest leaves many times the following log: [ 79.629901] vdsotest[396]: User access of kernel address (ffffffff) - exploit attempt? (uid: 0) A pointer set to (-1) is likely a programming error similar to a NULL pointer and is not worth logging as an exploit attempt. Don't log user accesses to 0xffffffff. Signed-off-by: Christophe Leroy <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/0728849e826ba16f1fbd6fa7f5c6cc87bd64e097.1577087627.git.christophe.leroy@c-s.fr
2020-01-26powerpc: use probe_user_read() and probe_user_write()Christophe Leroy1-5/+1
Instead of opencoding, use probe_user_read() to failessly read a user location and probe_user_write() for writing to user. Signed-off-by: Christophe Leroy <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/e041f5eedb23f09ab553be8a91c3de2087147320.1579800517.git.christophe.leroy@c-s.fr
2019-11-18powerpc/mm: Show if a bad page fault on data is read or write.Christophe Leroy1-2/+4
DSISR (or ESR on some CPUs) has a bit to tell if the fault is due to a read or a write. Display it. Signed-off-by: Christophe Leroy <[email protected]> Reviewed-by: Santosh Sivaraj <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/4f88d7e6fda53b5f80a71040ab400242f6c8cb93.1566400889.git.christophe.leroy@c-s.fr
2019-07-16mm, kprobes: generalize and rename notify_page_fault() as kprobe_page_fault()Anshuman Khandual1-21/+2
Architectures which support kprobes have very similar boilerplate around calling kprobe_fault_handler(). Use a helper function in kprobes.h to unify them, based on the x86 code. This changes the behaviour for other architectures when preemption is enabled. Previously, they would have disabled preemption while calling the kprobe handler. However, preemption would be disabled if this fault was due to a kprobe, so we know the fault was not due to a kprobe handler and can simply return failure. This behaviour was introduced in commit a980c0ef9f6d ("x86/kprobes: Refactor kprobes_fault() like kprobe_exceptions_notify()") [[email protected]: export kprobe_fault_handler()] Link: http://lkml.kernel.org/r/[email protected] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Anshuman Khandual <[email protected]> Reviewed-by: Dave Hansen <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Christophe Leroy <[email protected]> Cc: Stephen Rothwell <[email protected]> Cc: Andrey Konovalov <[email protected]> Cc: Michael Ellerman <[email protected]> Cc: Paul Mackerras <[email protected]> Cc: Russell King <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Will Deacon <[email protected]> Cc: Tony Luck <[email protected]> Cc: Fenghua Yu <[email protected]> Cc: Martin Schwidefsky <[email protected]> Cc: Heiko Carstens <[email protected]> Cc: Yoshinori Sato <[email protected]> Cc: "David S. Miller" <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Vineet Gupta <[email protected]> Cc: James Hogan <[email protected]> Cc: Paul Burton <[email protected]> Cc: Ralf Baechle <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
2019-07-08Merge branch 'siginfo-linus' of ↵Linus Torvalds1-3/+2
git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace Pull force_sig() argument change from Eric Biederman: "A source of error over the years has been that force_sig has taken a task parameter when it is only safe to use force_sig with the current task. The force_sig function is built for delivering synchronous signals such as SIGSEGV where the userspace application caused a synchronous fault (such as a page fault) and the kernel responded with a signal. Because the name force_sig does not make this clear, and because the force_sig takes a task parameter the function force_sig has been abused for sending other kinds of signals over the years. Slowly those have been fixed when the oopses have been tracked down. This set of changes fixes the remaining abusers of force_sig and carefully rips out the task parameter from force_sig and friends making this kind of error almost impossible in the future" * 'siginfo-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (27 commits) signal/x86: Move tsk inside of CONFIG_MEMORY_FAILURE in do_sigbus signal: Remove the signal number and task parameters from force_sig_info signal: Factor force_sig_info_to_task out of force_sig_info signal: Generate the siginfo in force_sig signal: Move the computation of force into send_signal and correct it. signal: Properly set TRACE_SIGNAL_LOSE_INFO in __send_signal signal: Remove the task parameter from force_sig_fault signal: Use force_sig_fault_to_task for the two calls that don't deliver to current signal: Explicitly call force_sig_fault on current signal/unicore32: Remove tsk parameter from __do_user_fault signal/arm: Remove tsk parameter from __do_user_fault signal/arm: Remove tsk parameter from ptrace_break signal/nds32: Remove tsk parameter from send_sigtrap signal/riscv: Remove tsk parameter from do_trap signal/sh: Remove tsk parameter from force_sig_info_fault signal/um: Remove task parameter from send_sigtrap signal/x86: Remove task parameter from send_sigtrap signal: Remove task parameter from force_sig_mceerr signal: Remove task parameter from force_sig signal: Remove task parameter from force_sigsegv ...
2019-05-30treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 152Thomas Gleixner1-5/+1
Based on 1 normalized pattern(s): this program is free software you can redistribute it and or modify it under the terms of the gnu general public license as published by the free software foundation either version 2 of the license or at your option any later version extracted by the scancode license scanner the SPDX license identifier GPL-2.0-or-later has been chosen to replace the boilerplate/reference in 3029 file(s). Signed-off-by: Thomas Gleixner <[email protected]> Reviewed-by: Allison Randal <[email protected]> Cc: [email protected] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Greg Kroah-Hartman <[email protected]>
2019-05-29signal: Remove the task parameter from force_sig_faultEric W. Biederman1-1/+1
As synchronous exceptions really only make sense against the current task (otherwise how are you synchronous) remove the task parameter from from force_sig_fault to make it explicit that is what is going on. The two known exceptions that deliver a synchronous exception to a stopped ptraced task have already been changed to force_sig_fault_to_task. The callers have been changed with the following emacs regular expression (with obvious variations on the architectures that take more arguments) to avoid typos: force_sig_fault[(]\([^,]+\)[,]\([^,]+\)[,]\([^,]+\)[,]\W+current[)] -> force_sig_fault(\1,\2,\3) Signed-off-by: "Eric W. Biederman" <[email protected]>
2019-05-27signal: Remove task parameter from force_sig_mceerrEric W. Biederman1-2/+1
All of the callers pass current into force_sig_mceer so remove the task parameter to make this obvious. This also makes it clear that force_sig_mceerr passes current into force_sig_info. Signed-off-by: "Eric W. Biederman" <[email protected]>
2019-04-21powerpc/mm: Detect bad KUAP faultsMichael Ellerman1-3/+22
When KUAP is enabled we have logic to detect page faults that occur outside of a valid user access region and are blocked by the AMR. What we don't have at the moment is logic to detect a fault *within* a valid user access region, that has been incorrectly blocked by AMR. This is not meant to ever happen, but it can if we incorrectly save/restore the AMR, or if the AMR was overwritten for some other reason. Currently if that happens we assume it's just a regular fault that will be corrected by handling the fault normally, so we just return. But there is nothing the fault handling code can do to fix it, so the fault just happens again and we spin forever, leading to soft lockups. So add some logic to detect that case and WARN() if we ever see it. Arguably it should be a BUG(), but it's more polite to fail the access and let the kernel continue, rather than taking down the box. There should be no data integrity issue with failing the fault rather than BUG'ing, as we're just going to disallow an access that should have been allowed. To make the code a little easier to follow, unroll the condition at the end of bad_kernel_fault() and comment each case, before adding the call to bad_kuap_fault(). Signed-off-by: Michael Ellerman <[email protected]>
2019-04-21powerpc: Add a framework for Kernel Userspace Access ProtectionChristophe Leroy1-4/+15
This patch implements a framework for Kernel Userspace Access Protection. Then subarches will have the possibility to provide their own implementation by providing setup_kuap() and allow/prevent_user_access(). Some platforms will need to know the area accessed and whether it is accessed from read, write or both. Therefore source, destination and size and handed over to the two functions. mpe: Rename to allow/prevent rather than unlock/lock, and add read/write wrappers. Drop the 32-bit code for now until we have an implementation for it. Add kuap to pt_regs for 64-bit as well as 32-bit. Don't split strings, use pr_crit_ratelimited(). Signed-off-by: Christophe Leroy <[email protected]> Signed-off-by: Russell Currey <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
2019-04-21powerpc: Add skeleton for Kernel Userspace Execution PreventionChristophe Leroy1-5/+4
This patch adds a skeleton for Kernel Userspace Execution Prevention. Then subarches implementing it have to define CONFIG_PPC_HAVE_KUEP and provide setup_kuep() function. Signed-off-by: Christophe Leroy <[email protected]> [mpe: Don't split strings, use pr_crit_ratelimited()] Signed-off-by: Michael Ellerman <[email protected]>
2019-01-03Remove 'type' argument from access_ok() functionLinus Torvalds1-1/+1
Nobody has actually used the type (VERIFY_READ vs VERIFY_WRITE) argument of the user address range verification function since we got rid of the old racy i386-only code to walk page tables by hand. It existed because the original 80386 would not honor the write protect bit when in kernel mode, so you had to do COW by hand before doing any user access. But we haven't supported that in a long time, and these days the 'type' argument is a purely historical artifact. A discussion about extending 'user_access_begin()' to do the range checking resulted this patch, because there is no way we're going to move the old VERIFY_xyz interface to that model. And it's best done at the end of the merge window when I've done most of my merges, so let's just get this done once and for all. This patch was mostly done with a sed-script, with manual fix-ups for the cases that weren't of the trivial 'access_ok(VERIFY_xyz' form. There were a couple of notable cases: - csky still had the old "verify_area()" name as an alias. - the iter_iov code had magical hardcoded knowledge of the actual values of VERIFY_{READ,WRITE} (not that they mattered, since nothing really used it) - microblaze used the type argument for a debug printout but other than those oddities this should be a total no-op patch. I tried to fix up all architectures, did fairly extensive grepping for access_ok() uses, and the changes are trivial, but I may have missed something. Any missed conversion should be trivially fixable, though. Signed-off-by: Linus Torvalds <[email protected]>
2018-12-27Merge tag 'powerpc-4.21-1' of ↵Linus Torvalds1-16/+33
git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux Pull powerpc updates from Michael Ellerman: "Notable changes: - Mitigations for Spectre v2 on some Freescale (NXP) CPUs. - A large series adding support for pass-through of Nvidia V100 GPUs to guests on Power9. - Another large series to enable hardware assistance for TLB table walk on MPC8xx CPUs. - Some preparatory changes to our DMA code, to make way for further cleanups from Christoph. - Several fixes for our Transactional Memory handling discovered by fuzzing the signal return path. - Support for generating our system call table(s) from a text file like other architectures. - A fix to our page fault handler so that instead of generating a WARN_ON_ONCE, user accesses of kernel addresses instead print a ratelimited and appropriately scary warning. - A cosmetic change to make our unhandled page fault messages more similar to other arches and also more compact and informative. - Freescale updates from Scott: "Highlights include elimination of legacy clock bindings use from dts files, an 83xx watchdog handler, fixes to old dts interrupt errors, and some minor cleanup." And many clean-ups, reworks and minor fixes etc. Thanks to: Alexandre Belloni, Alexey Kardashevskiy, Andrew Donnellan, Aneesh Kumar K.V, Arnd Bergmann, Benjamin Herrenschmidt, Breno Leitao, Christian Lamparter, Christophe Leroy, Christoph Hellwig, Daniel Axtens, Darren Stevens, David Gibson, Diana Craciun, Dmitry V. Levin, Firoz Khan, Geert Uytterhoeven, Greg Kurz, Gustavo Romero, Hari Bathini, Joel Stanley, Kees Cook, Madhavan Srinivasan, Mahesh Salgaonkar, Markus Elfring, Mathieu Malaterre, Michal Suchánek, Naveen N. Rao, Nick Desaulniers, Oliver O'Halloran, Paul Mackerras, Ram Pai, Ravi Bangoria, Rob Herring, Russell Currey, Sabyasachi Gupta, Sam Bobroff, Satheesh Rajendran, Scott Wood, Segher Boessenkool, Stephen Rothwell, Tang Yuantian, Thiago Jung Bauermann, Yangtao Li, Yuantian Tang, Yue Haibing" * tag 'powerpc-4.21-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (201 commits) Revert "powerpc/fsl_pci: simplify fsl_pci_dma_set_mask" powerpc/zImage: Also check for stdout-path powerpc: Fix HMIs on big-endian with CONFIG_RELOCATABLE=y macintosh: Use of_node_name_{eq, prefix} for node name comparisons ide: Use of_node_name_eq for node name comparisons powerpc: Use of_node_name_eq for node name comparisons powerpc/pseries/pmem: Convert to %pOFn instead of device_node.name powerpc/mm: Remove very old comment in hash-4k.h powerpc/pseries: Fix node leak in update_lmb_associativity_index() powerpc/configs/85xx: Enable CONFIG_DEBUG_KERNEL powerpc/dts/fsl: Fix dtc-flagged interrupt errors clk: qoriq: add more compatibles strings powerpc/fsl: Use new clockgen binding powerpc/83xx: handle machine check caused by watchdog timer powerpc/fsl-rio: fix spelling mistake "reserverd" -> "reserved" powerpc/fsl_pci: simplify fsl_pci_dma_set_mask arch/powerpc/fsl_rmu: Use dma_zalloc_coherent vfio_pci: Add NVIDIA GV100GL [Tesla V100 SXM2] subdriver vfio_pci: Allow regions to add own capabilities vfio_pci: Allow mapping extra regions ...
2018-12-21powerpc/mm: Fix reporting of kernel execute faults on the 8xxChristophe Leroy1-1/+3
On the 8xx, no-execute is set via PPP bits in the PTE. Therefore a no-exec fault generates DSISR_PROTFAULT error bits, not DSISR_NOEXEC_OR_G. This patch adds DSISR_PROTFAULT in the test mask. Fixes: d3ca587404b3 ("powerpc/mm: Fix reporting of kernel execute faults") Signed-off-by: Christophe Leroy <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
2018-12-20powerpc/mm: Make NULL pointer deferences explicit on bad page faults.Christophe Leroy1-8/+9
As several other arches including x86, this patch makes it explicit that a bad page fault is a NULL pointer dereference when the fault address is lower than PAGE_SIZE In the mean time, this page makes all bad_page_fault() messages shorter so that they remain on one single line. And it prefixes them by "BUG: " so that they get easily grepped. Signed-off-by: Christophe Leroy <[email protected]> [mpe: Avoid pr_cont()] Signed-off-by: Michael Ellerman <[email protected]>
2018-12-20powerpc/mm/hash: Handle user access of kernel address gracefullyAneesh Kumar K.V1-5/+19
In commit 2865d08dd9ea ("powerpc/mm: Move the DSISR_PROTFAULT sanity check") we moved the protection fault access check before the vma lookup. That means we hit that WARN_ON when user space accesses a kernel address. Before that commit this was handled by find_vma() not finding vma for the kernel address and considering that access as bad area access. Avoid the confusing WARN_ON and convert that to a ratelimited printk. With the patch we now get: for load: a.out[5997]: User access of kernel address (c00000000000dea0) - exploit attempt? (uid: 1000) a.out[5997]: segfault (11) at c00000000000dea0 nip 1317c0798 lr 7fff80d6441c code 1 in a.out[1317c0000+10000] a.out[5997]: code: 60000000 60420000 3c4c0002 38427790 4bffff20 3c4c0002 38427784 fbe1fff8 a.out[5997]: code: f821ffc1 7c3f0b78 60000000 e9228030 <89290000> 993f002f 60000000 383f0040 for exec: a.out[6067]: User access of kernel address (c00000000000dea0) - exploit attempt? (uid: 1000) a.out[6067]: segfault (11) at c00000000000dea0 nip c00000000000dea0 lr 129d507b0 code 1 a.out[6067]: Bad NIP, not dumping instructions. Fixes: 2865d08dd9ea ("powerpc/mm: Move the DSISR_PROTFAULT sanity check") Signed-off-by: Aneesh Kumar K.V <[email protected]> Tested-by: Breno Leitao <[email protected]> [mpe: Don't split printk() string across lines] Signed-off-by: Michael Ellerman <[email protected]>
2018-12-17KVM: PPC: Book3S HV: Implement functions to access quadrants 1 & 2Suraj Jitindar Singh1-0/+1
The POWER9 radix mmu has the concept of quadrants. The quadrant number is the two high bits of the effective address and determines the fully qualified address to be used for the translation. The fully qualified address consists of the effective lpid, the effective pid and the effective address. This gives then 4 possible quadrants 0, 1, 2, and 3. When accessing these quadrants the fully qualified address is obtained as follows: Quadrant | Hypervisor | Guest -------------------------------------------------------------------------- | EA[0:1] = 0b00 | EA[0:1] = 0b00 0 | effLPID = 0 | effLPID = LPIDR | effPID = PIDR | effPID = PIDR -------------------------------------------------------------------------- | EA[0:1] = 0b01 | 1 | effLPID = LPIDR | Invalid Access | effPID = PIDR | -------------------------------------------------------------------------- | EA[0:1] = 0b10 | 2 | effLPID = LPIDR | Invalid Access | effPID = 0 | -------------------------------------------------------------------------- | EA[0:1] = 0b11 | EA[0:1] = 0b11 3 | effLPID = 0 | effLPID = LPIDR | effPID = 0 | effPID = 0 -------------------------------------------------------------------------- In the Guest; Quadrant 3 is normally used to address the operating system since this uses effPID=0 and effLPID=LPIDR, meaning the PID register doesn't need to be switched. Quadrant 0 is normally used to address user space since the effLPID and effPID are taken from the corresponding registers. In the Host; Quadrant 0 and 3 are used as above, however the effLPID is always 0 to address the host. Quadrants 1 and 2 can be used by the host to address guest memory using a guest effective address. Since the effLPID comes from the LPID register, the host loads the LPID of the guest it would like to access (and the PID of the process) and can perform accesses to a guest effective address. This means quadrant 1 can be used to address the guest user space and quadrant 2 can be used to address the guest operating system from the hypervisor, using a guest effective address. Access to the quadrants can cause a Hypervisor Data Storage Interrupt (HDSI) due to being unable to perform partition scoped translation. Previously this could only be generated from a guest and so the code path expects us to take the KVM trampoline in the interrupt handler. This is no longer the case so we modify the handler to call bad_page_fault() to check if we were expecting this fault so we can handle it gracefully and just return with an error code. In the hash mmu case we still raise an unknown exception since quadrants aren't defined for the hash mmu. Signed-off-by: Suraj Jitindar Singh <[email protected]> Signed-off-by: Paul Mackerras <[email protected]>
2018-11-26powerpc: change CONFIG_PPC_STD_MMU to CONFIG_PPC_BOOK3SChristophe Leroy1-2/+2
Today we have: config PPC_BOOK3S def_bool y depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 config PPC_STD_MMU def_bool y depends on PPC_BOOK3S PPC_STD_MMU is therefore redundant with PPC_BOOK3S. Lets remove it. Signed-off-by: Christophe Leroy <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
2018-09-21signal/powerpc: Use force_sig_fault where appropriateEric W. Biederman1-8/+1
Reviewed-by: Stephen Rothwell <[email protected]> Signed-off-by: "Eric W. Biederman" <[email protected]>
2018-09-21signal/powerpc: Specialize _exception_pkey for handling pkey exceptionsEric W. Biederman1-1/+1
Now that _exception no longer calls _exception_pkey it is no longer necessary to handle any signal with any si_code. All pkey exceptions are SIGSEGV with paired with SEGV_PKUERR. So just handle that case and remove the now unnecessary parameters from _exception_pkey. Reviewed-by: Stephen Rothwell <[email protected]> Signed-off-by: "Eric W. Biederman" <[email protected]>
2018-09-21signal/powerpc: Remove pkey parameter from __bad_area_nosemaphoreEric W. Biederman1-5/+4
Now that bad_key_fault_exception no longer calls __bad_area_nosemaphore there is no reason for __bad_area_nosemaphore to handle pkeys. Reviewed-by: Stephen Rothwell <[email protected]> Signed-off-by: "Eric W. Biederman" <[email protected]>
2018-09-21signal/powerpc: Call _exception_pkey directly from bad_key_fault_exceptionEric W. Biederman1-1/+11
This removes the need for other code paths to deal with pkey exceptions. Reviewed-by: Stephen Rothwell <[email protected]> Signed-off-by: "Eric W. Biederman" <[email protected]>
2018-09-21signal/powerpc: Remove pkey parameter from __bad_areaEric W. Biederman1-5/+4
There are no callers of __bad_area that pass in a pkey parameter so it makes no sense to take one. Reviewed-by: Stephen Rothwell <[email protected]> Signed-off-by: "Eric W. Biederman" <[email protected]>
2018-09-21signal/powerpc: Use force_sig_mceerr as appropriateEric W. Biederman1-7/+11
In do_sigbus isolate the mceerr signaling code and call force_sig_mceerr instead of falling through to the force_sig_info that works for all of the other signals. Reviewed-by: Stephen Rothwell <[email protected]> Signed-off-by: "Eric W. Biederman" <[email protected]>
2018-08-17Merge branch 'akpm' (patches from Andrew)Linus Torvalds1-3/+4
Merge updates from Andrew Morton: - a few misc things - a few Y2038 fixes - ntfs fixes - arch/sh tweaks - ocfs2 updates - most of MM * emailed patches from Andrew Morton <[email protected]>: (111 commits) mm/hmm.c: remove unused variables align_start and align_end fs/userfaultfd.c: remove redundant pointer uwq mm, vmacache: hash addresses based on pmd mm/list_lru: introduce list_lru_shrink_walk_irq() mm/list_lru.c: pass struct list_lru_node* as an argument to __list_lru_walk_one() mm/list_lru.c: move locking from __list_lru_walk_one() to its caller mm/list_lru.c: use list_lru_walk_one() in list_lru_walk_node() mm, swap: make CONFIG_THP_SWAP depend on CONFIG_SWAP mm/sparse: delete old sparse_init and enable new one mm/sparse: add new sparse_init_nid() and sparse_init() mm/sparse: move buffer init/fini to the common place mm/sparse: use the new sparse buffer functions in non-vmemmap mm/sparse: abstract sparse buffer allocations mm/hugetlb.c: don't zero 1GiB bootmem pages mm, page_alloc: double zone's batchsize mm/oom_kill.c: document oom_lock mm/hugetlb: remove gigantic page support for HIGHMEM mm, oom: remove sleep from under oom_lock kernel/dma: remove unsupported gfp_mask parameter from dma_alloc_from_contiguous() mm/cma: remove unsupported gfp_mask parameter from cma_alloc() ...
2018-08-17mm: convert return type of handle_mm_fault() caller to vm_fault_tSouptick Joarder1-3/+4
Use new return type vm_fault_t for fault handler. For now, this is just documenting that the function returns a VM_FAULT value rather than an errno. Once all instances are converted, vm_fault_t will become a distinct type. Ref-> commit 1c8f422059ae ("mm: change return type to vm_fault_t") In this patch all the caller of handle_mm_fault() are changed to return vm_fault_t type. Link: http://lkml.kernel.org/r/20180617084810.GA6730@jordon-HP-15-Notebook-PC Signed-off-by: Souptick Joarder <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Richard Henderson <[email protected]> Cc: Tony Luck <[email protected]> Cc: Matt Turner <[email protected]> Cc: Vineet Gupta <[email protected]> Cc: Russell King <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Will Deacon <[email protected]> Cc: Richard Kuo <[email protected]> Cc: Geert Uytterhoeven <[email protected]> Cc: Michal Simek <[email protected]> Cc: James Hogan <[email protected]> Cc: Ley Foon Tan <[email protected]> Cc: Jonas Bonn <[email protected]> Cc: James E.J. Bottomley <[email protected]> Cc: Benjamin Herrenschmidt <[email protected]> Cc: Palmer Dabbelt <[email protected]> Cc: Yoshinori Sato <[email protected]> Cc: David S. Miller <[email protected]> Cc: Richard Weinberger <[email protected]> Cc: Guan Xuetao <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Cc: "Levin, Alexander (Sasha Levin)" <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
2018-07-30powerpc: remove unnecessary inclusion of asm/tlbflush.hChristophe Leroy1-1/+0
asm/tlbflush.h is only needed for: - using functions xxx_flush_tlb_xxx() - using MMU_NO_CONTEXT - including asm-generic/pgtable.h Signed-off-by: Christophe Leroy <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
2018-06-07Merge tag 'powerpc-4.18-1' of ↵Linus Torvalds1-29/+47
git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux Pull powerpc updates from Michael Ellerman: "Notable changes: - Support for split PMD page table lock on 64-bit Book3S (Power8/9). - Add support for HAVE_RELIABLE_STACKTRACE, so we properly support live patching again. - Add support for patching barrier_nospec in copy_from_user() and syscall entry. - A couple of fixes for our data breakpoints on Book3S. - A series from Nick optimising TLB/mm handling with the Radix MMU. - Numerous small cleanups to squash sparse/gcc warnings from Mathieu Malaterre. - Several series optimising various parts of the 32-bit code from Christophe Leroy. - Removal of support for two old machines, "SBC834xE" and "C2K" ("GEFanuc,C2K"), which is why the diffstat has so many deletions. And many other small improvements & fixes. There's a few out-of-area changes. Some minor ftrace changes OK'ed by Steve, and a fix to our powernv cpuidle driver. Then there's a series touching mm, x86 and fs/proc/task_mmu.c, which cleans up some details around pkey support. It was ack'ed/reviewed by Ingo & Dave and has been in next for several weeks. Thanks to: Akshay Adiga, Alastair D'Silva, Alexey Kardashevskiy, Al Viro, Andrew Donnellan, Aneesh Kumar K.V, Anju T Sudhakar, Arnd Bergmann, Balbir Singh, Cédric Le Goater, Christophe Leroy, Christophe Lombard, Colin Ian King, Dave Hansen, Fabio Estevam, Finn Thain, Frederic Barrat, Gautham R. Shenoy, Haren Myneni, Hari Bathini, Ingo Molnar, Jonathan Neuschäfer, Josh Poimboeuf, Kamalesh Babulal, Madhavan Srinivasan, Mahesh Salgaonkar, Mark Greer, Mathieu Malaterre, Matthew Wilcox, Michael Neuling, Michal Suchanek, Naveen N. Rao, Nicholas Piggin, Nicolai Stange, Olof Johansson, Paul Gortmaker, Paul Mackerras, Peter Rosin, Pridhiviraj Paidipeddi, Ram Pai, Rashmica Gupta, Ravi Bangoria, Russell Currey, Sam Bobroff, Samuel Mendoza-Jonas, Segher Boessenkool, Shilpasri G Bhat, Simon Guo, Souptick Joarder, Stewart Smith, Thiago Jung Bauermann, Torsten Duwe, Vaibhav Jain, Wei Yongjun, Wolfram Sang, Yisheng Xie, YueHaibing" * tag 'powerpc-4.18-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (251 commits) powerpc/64s/radix: Fix missing ptesync in flush_cache_vmap cpuidle: powernv: Fix promotion from snooze if next state disabled powerpc: fix build failure by disabling attribute-alias warning in pci_32 ocxl: Fix missing unlock on error in afu_ioctl_enable_p9_wait() powerpc-opal: fix spelling mistake "Uniterrupted" -> "Uninterrupted" powerpc: fix spelling mistake: "Usupported" -> "Unsupported" powerpc/pkeys: Detach execute_only key on !PROT_EXEC powerpc/powernv: copy/paste - Mask SO bit in CR powerpc: Remove core support for Marvell mv64x60 hostbridges powerpc/boot: Remove core support for Marvell mv64x60 hostbridges powerpc/boot: Remove support for Marvell mv64x60 i2c controller powerpc/boot: Remove support for Marvell MPSC serial controller powerpc/embedded6xx: Remove C2K board support powerpc/lib: optimise PPC32 memcmp powerpc/lib: optimise 32 bits __clear_user() powerpc/time: inline arch_vtime_task_switch() powerpc/Makefile: set -mcpu=860 flag for the 8xx powerpc: Implement csum_ipv6_magic in assembly powerpc/32: Optimise __csum_partial() powerpc/lib: Adjust .balign inside string functions for PPC32 ...
2018-05-25powerpc/mm: Only read faulting instruction when necessary in do_page_fault()Christophe Leroy1-16/+34
Commit a7a9dcd882a67 ("powerpc: Avoid taking a data miss on every userspace instruction miss") has shown that limiting the read of faulting instruction to likely cases improves performance. This patch goes further into this direction by limiting the read of the faulting instruction to the only cases where it is likely needed. On an MPC885, with the same benchmark app as in the commit referred above, we see a reduction of about 3900 dTLB misses (approx 3%): Before the patch: Performance counter stats for './fault 500' (10 runs): 683033312 cpu-cycles ( +- 0.03% ) 134538 dTLB-load-misses ( +- 0.03% ) 46099 iTLB-load-misses ( +- 0.02% ) 19681 faults ( +- 0.02% ) 5.389747878 seconds time elapsed ( +- 0.06% ) With the patch: Performance counter stats for './fault 500' (10 runs): 682112862 cpu-cycles ( +- 0.03% ) 130619 dTLB-load-misses ( +- 0.03% ) 46073 iTLB-load-misses ( +- 0.05% ) 19681 faults ( +- 0.01% ) 5.381342641 seconds time elapsed ( +- 0.07% ) The proper work of the huge stack expansion was tested with the following app: int main(int argc, char **argv) { char buf[1024 * 1025]; sprintf(buf, "Hello world !\n"); printf(buf); exit(0); } Signed-off-by: Christophe Leroy <[email protected]> Reviewed-by: Nicholas Piggin <[email protected]> [mpe: Add include of pagemap.h to fix build errors] Signed-off-by: Michael Ellerman <[email protected]>
2018-05-25powerpc/mm: Use instruction symbolic names in store_updates_sp()Christophe Leroy1-13/+13
Use symbolic names defined in asm/ppc-opcode.h instead of hardcoded values. Signed-off-by: Christophe Leroy <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
2018-04-25signal: Ensure every siginfo we send has all bits initializedEric W. Biederman1-0/+1
Call clear_siginfo to ensure every stack allocated siginfo is properly initialized before being passed to the signal sending functions. Note: It is not safe to depend on C initializers to initialize struct siginfo on the stack because C is allowed to skip holes when initializing a structure. The initialization of struct siginfo in tracehook_report_syscall_exit was moved from the helper user_single_step_siginfo into tracehook_report_syscall_exit itself, to make it clear that the local variable siginfo gets fully initialized. In a few cases the scope of struct siginfo has been reduced to make it clear that siginfo siginfo is not used on other paths in the function in which it is declared. Instances of using memset to initialize siginfo have been replaced with calls clear_siginfo for clarity. Signed-off-by: "Eric W. Biederman" <[email protected]>
2018-04-04powerpc/mm/keys: Update documentation and remove unnecessary checkAneesh Kumar K.V1-16/+12
Adds more code comments. We also remove an unnecessary pkey check after we check for pkey error in this patch. Signed-off-by: Aneesh Kumar K.V <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
2018-01-21Merge branch 'fixes' into nextMichael Ellerman1-1/+6
Merge our fixes branch from the 4.15 cycle. Unusually the fixes branch saw some significant features merged, notably the RFI flush patches, so we want the code in next to be tested against that, to avoid any surprises when the two are merged. There's also some other work on the panic handling that was reverted in fixes and we now want to do properly in next, which would conflict. And we also fix a few other minor merge conflicts.
2018-01-20powerpc: Deliver SEGV signal on pkey violationRam Pai1-12/+27
The value of the pkey, whose protection got violated, is made available in si_pkey field of the siginfo structure. Signed-off-by: Ram Pai <[email protected]> Signed-off-by: Thiago Jung Bauermann <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
2018-01-20powerpc: Handle exceptions caused by pkey violationRam Pai1-0/+22
Handle Data and Instruction exceptions caused by memory protection-key. The CPU will detect the key fault if the HPTE is already programmed with the key. However if the HPTE is not hashed, a key fault will not be detected by the hardware. The software will detect pkey violation in such a case. Signed-off-by: Ram Pai <[email protected]> Signed-off-by: Thiago Jung Bauermann <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
2018-01-16powerpc: Use the TRAP macro whenever comparing a trap numberBenjamin Herrenschmidt1-1/+1
Trap numbers can have extra bits at the bottom that need to be filtered out. There are a few cases where we don't do that. It's possible that we got lucky but better safe than sorry. Signed-off-by: Benjamin Herrenschmidt <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
2018-01-02powerpc/mm: Fix SEGV on mapped region to return SEGV_ACCERRJohn Sperbeck1-1/+6
The recent refactoring of the powerpc page fault handler in commit c3350602e876 ("powerpc/mm: Make bad_area* helper functions") caused access to protected memory regions to indicate SEGV_MAPERR instead of the traditional SEGV_ACCERR in the si_code field of a user-space signal handler. This can confuse debug libraries that temporarily change the protection of memory regions, and expect to use SEGV_ACCERR as an indication to restore access to a region. This commit restores the previous behavior. The following program exhibits the issue: $ ./repro read || echo "FAILED" $ ./repro write || echo "FAILED" $ ./repro exec || echo "FAILED" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <signal.h> #include <sys/mman.h> #include <assert.h> static void segv_handler(int n, siginfo_t *info, void *arg) { _exit(info->si_code == SEGV_ACCERR ? 0 : 1); } int main(int argc, char **argv) { void *p = NULL; struct sigaction act = { .sa_sigaction = segv_handler, .sa_flags = SA_SIGINFO, }; assert(argc == 2); p = mmap(NULL, getpagesize(), (strcmp(argv[1], "write") == 0) ? PROT_READ : 0, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); assert(p != MAP_FAILED); assert(sigaction(SIGSEGV, &act, NULL) == 0); if (strcmp(argv[1], "read") == 0) printf("%c", *(unsigned char *)p); else if (strcmp(argv[1], "write") == 0) *(unsigned char *)p = 0; else if (strcmp(argv[1], "exec") == 0) ((void (*)(void))p)(); return 1; /* failed to generate SEGV */ } Fixes: c3350602e876 ("powerpc/mm: Make bad_area* helper functions") Cc: [email protected] # v4.14+ Signed-off-by: John Sperbeck <[email protected]> Acked-by: Benjamin Herrenschmidt <[email protected]> [mpe: Add commit references in change log] Signed-off-by: Michael Ellerman <[email protected]>
2017-08-10powerpc/8xx: Use symbolic names for DSISR bits in DSIChristophe Leroy1-1/+1
Use symbolic names for DSISR bits in DSI Signed-off-by: Christophe Leroy <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
2017-08-10powerpc/8xx: Getting rid of remaining use of CONFIG_8xxChristophe Leroy1-1/+1
Two config options exist to define powerpc MPC8xx: * CONFIG_PPC_8xx * CONFIG_8xx arch/powerpc/platforms/Kconfig.cputype has contained the following comment about CONFIG_8xx item for some years: "# this is temp to handle compat with arch=ppc" arch/powerpc is now the only place with remaining use of CONFIG_8xx: get rid of them. Signed-off-by: Christophe Leroy <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>