diff options
Diffstat (limited to 'tools/testing/selftests/x86')
-rw-r--r-- | tools/testing/selftests/x86/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/x86/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/x86/pkey-helpers.h | 219 | ||||
-rw-r--r-- | tools/testing/selftests/x86/protection_keys.c | 1506 |
4 files changed, 1 insertions, 1727 deletions
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore index 022a1f3b64ef..1aaef5bf119a 100644 --- a/tools/testing/selftests/x86/.gitignore +++ b/tools/testing/selftests/x86/.gitignore @@ -12,5 +12,4 @@ ldt_gdt iopl mpx-mini-test ioperm -protection_keys test_vdso diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 5d49bfec1e9a..5f16821c7f63 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ - protection_keys test_vdso test_vsyscall mov_ss_trap \ + test_vdso test_vsyscall mov_ss_trap \ syscall_arg_fault TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h deleted file mode 100644 index 254e5436bdd9..000000000000 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ /dev/null @@ -1,219 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PKEYS_HELPER_H -#define _PKEYS_HELPER_H -#define _GNU_SOURCE -#include <string.h> -#include <stdarg.h> -#include <stdio.h> -#include <stdint.h> -#include <stdbool.h> -#include <signal.h> -#include <assert.h> -#include <stdlib.h> -#include <ucontext.h> -#include <sys/mman.h> - -#define NR_PKEYS 16 -#define PKRU_BITS_PER_PKEY 2 - -#ifndef DEBUG_LEVEL -#define DEBUG_LEVEL 0 -#endif -#define DPRINT_IN_SIGNAL_BUF_SIZE 4096 -extern int dprint_in_signal; -extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; -static inline void sigsafe_printf(const char *format, ...) -{ - va_list ap; - - if (!dprint_in_signal) { - va_start(ap, format); - vprintf(format, ap); - va_end(ap); - } else { - int ret; - /* - * No printf() functions are signal-safe. - * They deadlock easily. Write the format - * string to get some output, even if - * incomplete. - */ - ret = write(1, format, strlen(format)); - if (ret < 0) - exit(1); - } -} -#define dprintf_level(level, args...) do { \ - if (level <= DEBUG_LEVEL) \ - sigsafe_printf(args); \ -} while (0) -#define dprintf0(args...) dprintf_level(0, args) -#define dprintf1(args...) dprintf_level(1, args) -#define dprintf2(args...) dprintf_level(2, args) -#define dprintf3(args...) dprintf_level(3, args) -#define dprintf4(args...) dprintf_level(4, args) - -extern unsigned int shadow_pkru; -static inline unsigned int __rdpkru(void) -{ - unsigned int eax, edx; - unsigned int ecx = 0; - unsigned int pkru; - - asm volatile(".byte 0x0f,0x01,0xee\n\t" - : "=a" (eax), "=d" (edx) - : "c" (ecx)); - pkru = eax; - return pkru; -} - -static inline unsigned int _rdpkru(int line) -{ - unsigned int pkru = __rdpkru(); - - dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n", - line, pkru, shadow_pkru); - assert(pkru == shadow_pkru); - - return pkru; -} - -#define rdpkru() _rdpkru(__LINE__) - -static inline void __wrpkru(unsigned int pkru) -{ - unsigned int eax = pkru; - unsigned int ecx = 0; - unsigned int edx = 0; - - dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); - asm volatile(".byte 0x0f,0x01,0xef\n\t" - : : "a" (eax), "c" (ecx), "d" (edx)); - assert(pkru == __rdpkru()); -} - -static inline void wrpkru(unsigned int pkru) -{ - dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru); - /* will do the shadow check for us: */ - rdpkru(); - __wrpkru(pkru); - shadow_pkru = pkru; - dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru()); -} - -/* - * These are technically racy. since something could - * change PKRU between the read and the write. - */ -static inline void __pkey_access_allow(int pkey, int do_allow) -{ - unsigned int pkru = rdpkru(); - int bit = pkey * 2; - - if (do_allow) - pkru &= (1<<bit); - else - pkru |= (1<<bit); - - dprintf4("pkru now: %08x\n", rdpkru()); - wrpkru(pkru); -} - -static inline void __pkey_write_allow(int pkey, int do_allow_write) -{ - long pkru = rdpkru(); - int bit = pkey * 2 + 1; - - if (do_allow_write) - pkru &= (1<<bit); - else - pkru |= (1<<bit); - - wrpkru(pkru); - dprintf4("pkru now: %08x\n", rdpkru()); -} - -#define PROT_PKEY0 0x10 /* protection key value (bit 0) */ -#define PROT_PKEY1 0x20 /* protection key value (bit 1) */ -#define PROT_PKEY2 0x40 /* protection key value (bit 2) */ -#define PROT_PKEY3 0x80 /* protection key value (bit 3) */ - -#define PAGE_SIZE 4096 -#define MB (1<<20) - -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile( - "cpuid;" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} - -/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */ -#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */ - -static inline int cpu_has_pku(void) -{ - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - - eax = 0x7; - ecx = 0x0; - __cpuid(&eax, &ebx, &ecx, &edx); - - if (!(ecx & X86_FEATURE_PKU)) { - dprintf2("cpu does not have PKU\n"); - return 0; - } - if (!(ecx & X86_FEATURE_OSPKE)) { - dprintf2("cpu does not have OSPKE\n"); - return 0; - } - return 1; -} - -#define XSTATE_PKRU_BIT (9) -#define XSTATE_PKRU 0x200 - -int pkru_xstate_offset(void) -{ - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - int xstate_offset; - int xstate_size; - unsigned long XSTATE_CPUID = 0xd; - int leaf; - - /* assume that XSTATE_PKRU is set in XCR0 */ - leaf = XSTATE_PKRU_BIT; - { - eax = XSTATE_CPUID; - ecx = leaf; - __cpuid(&eax, &ebx, &ecx, &edx); - - if (leaf == XSTATE_PKRU_BIT) { - xstate_offset = ebx; - xstate_size = eax; - } - } - - if (xstate_size == 0) { - printf("could not find size/offset of PKRU in xsave state\n"); - return 0; - } - - return xstate_offset; -} - -#endif /* _PKEYS_HELPER_H */ diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c deleted file mode 100644 index 480995bceefa..000000000000 --- a/tools/testing/selftests/x86/protection_keys.c +++ /dev/null @@ -1,1506 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Tests x86 Memory Protection Keys (see Documentation/core-api/protection-keys.rst) - * - * There are examples in here of: - * * how to set protection keys on memory - * * how to set/clear bits in PKRU (the rights register) - * * how to handle SEGV_PKRU signals and extract pkey-relevant - * information from the siginfo - * - * Things to add: - * make sure KSM and KSM COW breaking works - * prefault pages in at malloc, or not - * protect MPX bounds tables with protection keys? - * make sure VMA splitting/merging is working correctly - * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys - * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel - * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks - * - * Compile like this: - * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - */ -#define _GNU_SOURCE -#include <errno.h> -#include <linux/futex.h> -#include <sys/time.h> -#include <sys/syscall.h> -#include <string.h> -#include <stdio.h> -#include <stdint.h> -#include <stdbool.h> -#include <signal.h> -#include <assert.h> -#include <stdlib.h> -#include <ucontext.h> -#include <sys/mman.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <sys/ptrace.h> -#include <setjmp.h> - -#include "pkey-helpers.h" - -int iteration_nr = 1; -int test_nr; - -unsigned int shadow_pkru; - -#define HPAGE_SIZE (1UL<<21) -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) -#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) -#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) -#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) -#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to)) -#define __stringify_1(x...) #x -#define __stringify(x...) __stringify_1(x) - -#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) - -int dprint_in_signal; -char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; - -extern void abort_hooks(void); -#define pkey_assert(condition) do { \ - if (!(condition)) { \ - dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \ - __FILE__, __LINE__, \ - test_nr, iteration_nr); \ - dprintf0("errno at assert: %d", errno); \ - abort_hooks(); \ - exit(__LINE__); \ - } \ -} while (0) - -void cat_into_file(char *str, char *file) -{ - int fd = open(file, O_RDWR); - int ret; - - dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file); - /* - * these need to be raw because they are called under - * pkey_assert() - */ - if (fd < 0) { - fprintf(stderr, "error opening '%s'\n", str); - perror("error: "); - exit(__LINE__); - } - - ret = write(fd, str, strlen(str)); - if (ret != strlen(str)) { - perror("write to file failed"); - fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); - exit(__LINE__); - } - close(fd); -} - -#if CONTROL_TRACING > 0 -static int warned_tracing; -int tracing_root_ok(void) -{ - if (geteuid() != 0) { - if (!warned_tracing) - fprintf(stderr, "WARNING: not run as root, " - "can not do tracing control\n"); - warned_tracing = 1; - return 0; - } - return 1; -} -#endif - -void tracing_on(void) -{ -#if CONTROL_TRACING > 0 -#define TRACEDIR "/sys/kernel/debug/tracing" - char pidstr[32]; - - if (!tracing_root_ok()) - return; - - sprintf(pidstr, "%d", getpid()); - cat_into_file("0", TRACEDIR "/tracing_on"); - cat_into_file("\n", TRACEDIR "/trace"); - if (1) { - cat_into_file("function_graph", TRACEDIR "/current_tracer"); - cat_into_file("1", TRACEDIR "/options/funcgraph-proc"); - } else { - cat_into_file("nop", TRACEDIR "/current_tracer"); - } - cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid"); - cat_into_file("1", TRACEDIR "/tracing_on"); - dprintf1("enabled tracing\n"); -#endif -} - -void tracing_off(void) -{ -#if CONTROL_TRACING > 0 - if (!tracing_root_ok()) - return; - cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on"); -#endif -} - -void abort_hooks(void) -{ - fprintf(stderr, "running %s()...\n", __func__); - tracing_off(); -#ifdef SLEEP_ON_ABORT - sleep(SLEEP_ON_ABORT); -#endif -} - -static inline void __page_o_noops(void) -{ - /* 8-bytes of instruction * 512 bytes = 1 page */ - asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr"); -} - -/* - * This attempts to have roughly a page of instructions followed by a few - * instructions that do a write, and another page of instructions. That - * way, we are pretty sure that the write is in the second page of - * instructions and has at least a page of padding behind it. - * - * *That* lets us be sure to madvise() away the write instruction, which - * will then fault, which makes sure that the fault code handles - * execute-only memory properly. - */ -__attribute__((__aligned__(PAGE_SIZE))) -void lots_o_noops_around_write(int *write_to_me) -{ - dprintf3("running %s()\n", __func__); - __page_o_noops(); - /* Assume this happens in the second page of instructions: */ - *write_to_me = __LINE__; - /* pad out by another page: */ - __page_o_noops(); - dprintf3("%s() done\n", __func__); -} - -/* Define some kernel-like types */ -#define u8 uint8_t -#define u16 uint16_t -#define u32 uint32_t -#define u64 uint64_t - -#ifdef __i386__ - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 -#endif - -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 -#endif - -#define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 - -#else - -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 -#endif - -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 -#endif - -#define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 - -#endif - -void dump_mem(void *dumpme, int len_bytes) -{ - char *c = (void *)dumpme; - int i; - - for (i = 0; i < len_bytes; i += sizeof(u64)) { - u64 *ptr = (u64 *)(c + i); - dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr); - } -} - -/* Failed address bound checks: */ -#ifndef SEGV_BNDERR -# define SEGV_BNDERR 3 -#endif - -#ifndef SEGV_PKUERR -# define SEGV_PKUERR 4 -#endif - -static char *si_code_str(int si_code) -{ - if (si_code == SEGV_MAPERR) - return "SEGV_MAPERR"; - if (si_code == SEGV_ACCERR) - return "SEGV_ACCERR"; - if (si_code == SEGV_BNDERR) - return "SEGV_BNDERR"; - if (si_code == SEGV_PKUERR) - return "SEGV_PKUERR"; - return "UNKNOWN"; -} - -int pkru_faults; -int last_si_pkey = -1; -void signal_handler(int signum, siginfo_t *si, void *vucontext) -{ - ucontext_t *uctxt = vucontext; - int trapno; - unsigned long ip; - char *fpregs; - u32 *pkru_ptr; - u64 siginfo_pkey; - u32 *si_pkey_ptr; - int pkru_offset; - fpregset_t fpregset; - - dprint_in_signal = 1; - dprintf1(">>>>===============SIGSEGV============================\n"); - dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__, - __rdpkru(), shadow_pkru); - - trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; - ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; - fpregset = uctxt->uc_mcontext.fpregs; - fpregs = (void *)fpregset; - - dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__, - trapno, ip, si_code_str(si->si_code), si->si_code); -#ifdef __i386__ - /* - * 32-bit has some extra padding so that userspace can tell whether - * the XSTATE header is present in addition to the "legacy" FPU - * state. We just assume that it is here. - */ - fpregs += 0x70; -#endif - pkru_offset = pkru_xstate_offset(); - pkru_ptr = (void *)(&fpregs[pkru_offset]); - - dprintf1("siginfo: %p\n", si); - dprintf1(" fpregs: %p\n", fpregs); - /* - * If we got a PKRU fault, we *HAVE* to have at least one bit set in - * here. - */ - dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset()); - if (DEBUG_LEVEL > 4) - dump_mem(pkru_ptr - 128, 256); - pkey_assert(*pkru_ptr); - - if ((si->si_code == SEGV_MAPERR) || - (si->si_code == SEGV_ACCERR) || - (si->si_code == SEGV_BNDERR)) { - printf("non-PK si_code, exiting...\n"); - exit(4); - } - - si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); - dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); - dump_mem((u8 *)si_pkey_ptr - 8, 24); - siginfo_pkey = *si_pkey_ptr; - pkey_assert(siginfo_pkey < NR_PKEYS); - last_si_pkey = siginfo_pkey; - - dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); - /* need __rdpkru() version so we do not do shadow_pkru checking */ - dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); - dprintf1("pkey from siginfo: %jx\n", siginfo_pkey); - *(u64 *)pkru_ptr = 0x00000000; - dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); - pkru_faults++; - dprintf1("<<<<==================================================\n"); - dprint_in_signal = 0; -} - -int wait_all_children(void) -{ - int status; - return waitpid(-1, &status, 0); -} - -void sig_chld(int x) -{ - dprint_in_signal = 1; - dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); - dprint_in_signal = 0; -} - -void setup_sigsegv_handler(void) -{ - int r, rs; - struct sigaction newact; - struct sigaction oldact; - - /* #PF is mapped to sigsegv */ - int signum = SIGSEGV; - - newact.sa_handler = 0; - newact.sa_sigaction = signal_handler; - - /*sigset_t - signals to block while in the handler */ - /* get the old signal mask. */ - rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); - pkey_assert(rs == 0); - - /* call sa_sigaction, not sa_handler*/ - newact.sa_flags = SA_SIGINFO; - - newact.sa_restorer = 0; /* void(*)(), obsolete */ - r = sigaction(signum, &newact, &oldact); - r = sigaction(SIGALRM, &newact, &oldact); - pkey_assert(r == 0); -} - -void setup_handlers(void) -{ - signal(SIGCHLD, &sig_chld); - setup_sigsegv_handler(); -} - -pid_t fork_lazy_child(void) -{ - pid_t forkret; - - forkret = fork(); - pkey_assert(forkret >= 0); - dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); - - if (!forkret) { - /* in the child */ - while (1) { - dprintf1("child sleeping...\n"); - sleep(30); - } - } - return forkret; -} - -#ifndef PKEY_DISABLE_ACCESS -# define PKEY_DISABLE_ACCESS 0x1 -#endif - -#ifndef PKEY_DISABLE_WRITE -# define PKEY_DISABLE_WRITE 0x2 -#endif - -static u32 hw_pkey_get(int pkey, unsigned long flags) -{ - u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 pkru = __rdpkru(); - u32 shifted_pkru; - u32 masked_pkru; - - dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n", - __func__, pkey, flags, 0, 0); - dprintf2("%s() raw pkru: %x\n", __func__, pkru); - - shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY)); - dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru); - masked_pkru = shifted_pkru & mask; - dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru); - /* - * shift down the relevant bits to the lowest two, then - * mask off all the other high bits. - */ - return masked_pkru; -} - -static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) -{ - u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); - u32 old_pkru = __rdpkru(); - u32 new_pkru; - - /* make sure that 'rights' only contains the bits we expect: */ - assert(!(rights & ~mask)); - - /* copy old pkru */ - new_pkru = old_pkru; - /* mask out bits from pkey in old value: */ - new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY)); - /* OR in new bits for pkey: */ - new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY)); - - __wrpkru(new_pkru); - - dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n", - __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru); - return 0; -} - -void pkey_disable_set(int pkey, int flags) -{ - unsigned long syscall_flags = 0; - int ret; - int pkey_rights; - u32 orig_pkru = rdpkru(); - - dprintf1("START->%s(%d, 0x%x)\n", __func__, - pkey, flags); - pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - - pkey_rights = hw_pkey_get(pkey, syscall_flags); - - dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - pkey_assert(pkey_rights >= 0); - - pkey_rights |= flags; - - ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); - assert(!ret); - /*pkru and flags have the same format */ - shadow_pkru |= flags << (pkey * 2); - dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru); - - pkey_assert(ret >= 0); - - pkey_rights = hw_pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - - dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); - if (flags) - pkey_assert(rdpkru() > orig_pkru); - dprintf1("END<---%s(%d, 0x%x)\n", __func__, - pkey, flags); -} - -void pkey_disable_clear(int pkey, int flags) -{ - unsigned long syscall_flags = 0; - int ret; - int pkey_rights = hw_pkey_get(pkey, syscall_flags); - u32 orig_pkru = rdpkru(); - - pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - - dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - pkey_assert(pkey_rights >= 0); - - pkey_rights |= flags; - - ret = hw_pkey_set(pkey, pkey_rights, 0); - /* pkru and flags have the same format */ - shadow_pkru &= ~(flags << (pkey * 2)); - pkey_assert(ret >= 0); - - pkey_rights = hw_pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, - pkey, pkey, pkey_rights); - - dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); - if (flags) - assert(rdpkru() > orig_pkru); -} - -void pkey_write_allow(int pkey) -{ - pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); -} -void pkey_write_deny(int pkey) -{ - pkey_disable_set(pkey, PKEY_DISABLE_WRITE); -} -void pkey_access_allow(int pkey) -{ - pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); -} -void pkey_access_deny(int pkey) -{ - pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); -} - -int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, - unsigned long pkey) -{ - int sret; - - dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, - ptr, size, orig_prot, pkey); - - errno = 0; - sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); - if (errno) { - dprintf2("SYS_mprotect_key sret: %d\n", sret); - dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); - dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); - if (DEBUG_LEVEL >= 2) - perror("SYS_mprotect_pkey"); - } - return sret; -} - -int sys_pkey_alloc(unsigned long flags, unsigned long init_val) -{ - int ret = syscall(SYS_pkey_alloc, flags, init_val); - dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", - __func__, flags, init_val, ret, errno); - return ret; -} - -int alloc_pkey(void) -{ - int ret; - unsigned long init_val = 0x0; - - dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n", - __LINE__, __rdpkru(), shadow_pkru); - ret = sys_pkey_alloc(0, init_val); - /* - * pkey_alloc() sets PKRU, so we need to reflect it in - * shadow_pkru: - */ - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - if (ret) { - /* clear both the bits: */ - shadow_pkru &= ~(0x3 << (ret * 2)); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - /* - * move the new state in from init_val - * (remember, we cheated and init_val == pkru format) - */ - shadow_pkru |= (init_val << (ret * 2)); - } - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno); - /* for shadow checking: */ - rdpkru(); - dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", - __LINE__, ret, __rdpkru(), shadow_pkru); - return ret; -} - -int sys_pkey_free(unsigned long pkey) -{ - int ret = syscall(SYS_pkey_free, pkey); - dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); - return ret; -} - -/* - * I had a bug where pkey bits could be set by mprotect() but - * not cleared. This ensures we get lots of random bit sets - * and clears on the vma and pte pkey bits. - */ -int alloc_random_pkey(void) -{ - int max_nr_pkey_allocs; - int ret; - int i; - int alloced_pkeys[NR_PKEYS]; - int nr_alloced = 0; - int random_index; - memset(alloced_pkeys, 0, sizeof(alloced_pkeys)); - - /* allocate every possible key and make a note of which ones we got */ - max_nr_pkey_allocs = NR_PKEYS; - max_nr_pkey_allocs = 1; - for (i = 0; i < max_nr_pkey_allocs; i++) { - int new_pkey = alloc_pkey(); - if (new_pkey < 0) - break; - alloced_pkeys[nr_alloced++] = new_pkey; - } - - pkey_assert(nr_alloced > 0); - /* select a random one out of the allocated ones */ - random_index = rand() % nr_alloced; - ret = alloced_pkeys[random_index]; - /* now zero it out so we don't free it next */ - alloced_pkeys[random_index] = 0; - - /* go through the allocated ones that we did not want and free them */ - for (i = 0; i < nr_alloced; i++) { - int free_ret; - if (!alloced_pkeys[i]) - continue; - free_ret = sys_pkey_free(alloced_pkeys[i]); - pkey_assert(!free_ret); - } - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - return ret; -} - -int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, - unsigned long pkey) -{ - int nr_iterations = random() % 100; - int ret; - - while (0) { - int rpkey = alloc_random_pkey(); - ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); - dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", - ptr, size, orig_prot, pkey, ret); - if (nr_iterations-- < 0) - break; - - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - sys_pkey_free(rpkey); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - } - pkey_assert(pkey < NR_PKEYS); - - ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey); - dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n", - ptr, size, orig_prot, pkey, ret); - pkey_assert(!ret); - dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, ret, __rdpkru(), shadow_pkru); - return ret; -} - -struct pkey_malloc_record { - void *ptr; - long size; - int prot; -}; -struct pkey_malloc_record *pkey_malloc_records; -struct pkey_malloc_record *pkey_last_malloc_record; -long nr_pkey_malloc_records; -void record_pkey_malloc(void *ptr, long size, int prot) -{ - long i; - struct pkey_malloc_record *rec = NULL; - - for (i = 0; i < nr_pkey_malloc_records; i++) { - rec = &pkey_malloc_records[i]; - /* find a free record */ - if (rec) - break; - } - if (!rec) { - /* every record is full */ - size_t old_nr_records = nr_pkey_malloc_records; - size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1); - size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record); - dprintf2("new_nr_records: %zd\n", new_nr_records); - dprintf2("new_size: %zd\n", new_size); - pkey_malloc_records = realloc(pkey_malloc_records, new_size); - pkey_assert(pkey_malloc_records != NULL); - rec = &pkey_malloc_records[nr_pkey_malloc_records]; - /* - * realloc() does not initialize memory, so zero it from - * the first new record all the way to the end. - */ - for (i = 0; i < new_nr_records - old_nr_records; i++) - memset(rec + i, 0, sizeof(*rec)); - } - dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n", - (int)(rec - pkey_malloc_records), rec, ptr, size); - rec->ptr = ptr; - rec->size = size; - rec->prot = prot; - pkey_last_malloc_record = rec; - nr_pkey_malloc_records++; -} - -void free_pkey_malloc(void *ptr) -{ - long i; - int ret; - dprintf3("%s(%p)\n", __func__, ptr); - for (i = 0; i < nr_pkey_malloc_records; i++) { - struct pkey_malloc_record *rec = &pkey_malloc_records[i]; - dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n", - ptr, i, rec, rec->ptr, rec->size); - if ((ptr < rec->ptr) || - (ptr >= rec->ptr + rec->size)) - continue; - - dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n", - ptr, i, rec, rec->ptr, rec->size); - nr_pkey_malloc_records--; - ret = munmap(rec->ptr, rec->size); - dprintf3("munmap ret: %d\n", ret); - pkey_assert(!ret); - dprintf3("clearing rec->ptr, rec: %p\n", rec); - rec->ptr = NULL; - dprintf3("done clearing rec->ptr, rec: %p\n", rec); - return; - } - pkey_assert(false); -} - - -void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) -{ - void *ptr; - int ret; - - rdpkru(); - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - pkey_assert(pkey < NR_PKEYS); - ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - pkey_assert(ptr != (void *)-1); - ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); - pkey_assert(!ret); - record_pkey_malloc(ptr, size, prot); - rdpkru(); - - dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); - return ptr; -} - -void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) -{ - int ret; - void *ptr; - - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - /* - * Guarantee we can fit at least one huge page in the resulting - * allocation by allocating space for 2: - */ - size = ALIGN_UP(size, HPAGE_SIZE * 2); - ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - pkey_assert(ptr != (void *)-1); - record_pkey_malloc(ptr, size, prot); - mprotect_pkey(ptr, size, prot, pkey); - - dprintf1("unaligned ptr: %p\n", ptr); - ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE); - dprintf1(" aligned ptr: %p\n", ptr); - ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE); - dprintf1("MADV_HUGEPAGE ret: %d\n", ret); - ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED); - dprintf1("MADV_WILLNEED ret: %d\n", ret); - memset(ptr, 0, HPAGE_SIZE); - - dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr); - return ptr; -} - -int hugetlb_setup_ok; -#define GET_NR_HUGE_PAGES 10 -void setup_hugetlbfs(void) -{ - int err; - int fd; - char buf[] = "123"; - - if (geteuid() != 0) { - fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n"); - return; - } - - cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages"); - - /* - * Now go make sure that we got the pages and that they - * are 2M pages. Someone might have made 1G the default. - */ - fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY); - if (fd < 0) { - perror("opening sysfs 2M hugetlb config"); - return; - } - - /* -1 to guarantee leaving the trailing \0 */ - err = read(fd, buf, sizeof(buf)-1); - close(fd); - if (err <= 0) { - perror("reading sysfs 2M hugetlb config"); - return; - } - - if (atoi(buf) != GET_NR_HUGE_PAGES) { - fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n", - buf, GET_NR_HUGE_PAGES); - return; - } - - hugetlb_setup_ok = 1; -} - -void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) -{ - void *ptr; - int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; - - if (!hugetlb_setup_ok) - return PTR_ERR_ENOTSUP; - - dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey); - size = ALIGN_UP(size, HPAGE_SIZE * 2); - pkey_assert(pkey < NR_PKEYS); - ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0); - pkey_assert(ptr != (void *)-1); - mprotect_pkey(ptr, size, prot, pkey); - - record_pkey_malloc(ptr, size, prot); - - dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); - return ptr; -} - -void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) -{ - void *ptr; - int fd; - - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - pkey_assert(pkey < NR_PKEYS); - fd = open("/dax/foo", O_RDWR); - pkey_assert(fd >= 0); - - ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); - pkey_assert(ptr != (void *)-1); - - mprotect_pkey(ptr, size, prot, pkey); - - record_pkey_malloc(ptr, size, prot); - - dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); - close(fd); - return ptr; -} - -void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { - - malloc_pkey_with_mprotect, - malloc_pkey_anon_huge, - malloc_pkey_hugetlb -/* can not do direct with the pkey_mprotect() API: - malloc_pkey_mmap_direct, - malloc_pkey_mmap_dax, -*/ -}; - -void *malloc_pkey(long size, int prot, u16 pkey) -{ - void *ret; - static int malloc_type; - int nr_malloc_types = ARRAY_SIZE(pkey_malloc); - - pkey_assert(pkey < NR_PKEYS); - - while (1) { - pkey_assert(malloc_type < nr_malloc_types); - - ret = pkey_malloc[malloc_type](size, prot, pkey); - pkey_assert(ret != (void *)-1); - - malloc_type++; - if (malloc_type >= nr_malloc_types) - malloc_type = (random()%nr_malloc_types); - - /* try again if the malloc_type we tried is unsupported */ - if (ret == PTR_ERR_ENOTSUP) - continue; - - break; - } - - dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__, - size, prot, pkey, ret); - return ret; -} - -int last_pkru_faults; -#define UNKNOWN_PKEY -2 -void expected_pk_fault(int pkey) -{ - dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", - __func__, last_pkru_faults, pkru_faults); - dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); - pkey_assert(last_pkru_faults + 1 == pkru_faults); - - /* - * For exec-only memory, we do not know the pkey in - * advance, so skip this check. - */ - if (pkey != UNKNOWN_PKEY) - pkey_assert(last_si_pkey == pkey); - - /* - * The signal handler shold have cleared out PKRU to let the - * test program continue. We now have to restore it. - */ - if (__rdpkru() != 0) - pkey_assert(0); - - __wrpkru(shadow_pkru); - dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n", - __func__, shadow_pkru); - last_pkru_faults = pkru_faults; - last_si_pkey = -1; -} - -#define do_not_expect_pk_fault(msg) do { \ - if (last_pkru_faults != pkru_faults) \ - dprintf0("unexpected PK fault: %s\n", msg); \ - pkey_assert(last_pkru_faults == pkru_faults); \ -} while (0) - -int test_fds[10] = { -1 }; -int nr_test_fds; -void __save_test_fd(int fd) -{ - pkey_assert(fd >= 0); - pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); - test_fds[nr_test_fds] = fd; - nr_test_fds++; -} - -int get_test_read_fd(void) -{ - int test_fd = open("/etc/passwd", O_RDONLY); - __save_test_fd(test_fd); - return test_fd; -} - -void close_test_fds(void) -{ - int i; - - for (i = 0; i < nr_test_fds; i++) { - if (test_fds[i] < 0) - continue; - close(test_fds[i]); - test_fds[i] = -1; - } - nr_test_fds = 0; -} - -#define barrier() __asm__ __volatile__("": : :"memory") -__attribute__((noinline)) int read_ptr(int *ptr) -{ - /* - * Keep GCC from optimizing this away somehow - */ - barrier(); - return *ptr; -} - -void test_read_of_write_disabled_region(int *ptr, u16 pkey) -{ - int ptr_contents; - - dprintf1("disabling write access to PKEY[1], doing read\n"); - pkey_write_deny(pkey); - ptr_contents = read_ptr(ptr); - dprintf1("*ptr: %d\n", ptr_contents); - dprintf1("\n"); -} -void test_read_of_access_disabled_region(int *ptr, u16 pkey) -{ - int ptr_contents; - - dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr); - rdpkru(); - pkey_access_deny(pkey); - ptr_contents = read_ptr(ptr); - dprintf1("*ptr: %d\n", ptr_contents); - expected_pk_fault(pkey); -} -void test_write_of_write_disabled_region(int *ptr, u16 pkey) -{ - dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); - pkey_write_deny(pkey); - *ptr = __LINE__; - expected_pk_fault(pkey); -} -void test_write_of_access_disabled_region(int *ptr, u16 pkey) -{ - dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); - pkey_access_deny(pkey); - *ptr = __LINE__; - expected_pk_fault(pkey); -} -void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) -{ - int ret; - int test_fd = get_test_read_fd(); - - dprintf1("disabling access to PKEY[%02d], " - "having kernel read() to buffer\n", pkey); - pkey_access_deny(pkey); - ret = read(test_fd, ptr, 1); - dprintf1("read ret: %d\n", ret); - pkey_assert(ret); -} -void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) -{ - int ret; - int test_fd = get_test_read_fd(); - - pkey_write_deny(pkey); - ret = read(test_fd, ptr, 100); - dprintf1("read ret: %d\n", ret); - if (ret < 0 && (DEBUG_LEVEL > 0)) - perror("verbose read result (OK for this to be bad)"); - pkey_assert(ret); -} - -void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) -{ - int pipe_ret, vmsplice_ret; - struct iovec iov; - int pipe_fds[2]; - - pipe_ret = pipe(pipe_fds); - - pkey_assert(pipe_ret == 0); - dprintf1("disabling access to PKEY[%02d], " - "having kernel vmsplice from buffer\n", pkey); - pkey_access_deny(pkey); - iov.iov_base = ptr; - iov.iov_len = PAGE_SIZE; - vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT); - dprintf1("vmsplice() ret: %d\n", vmsplice_ret); - pkey_assert(vmsplice_ret == -1); - - close(pipe_fds[0]); - close(pipe_fds[1]); -} - -void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) -{ - int ignored = 0xdada; - int futex_ret; - int some_int = __LINE__; - - dprintf1("disabling write to PKEY[%02d], " - "doing futex gunk in buffer\n", pkey); - *ptr = some_int; - pkey_write_deny(pkey); - futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL, - &ignored, ignored); - if (DEBUG_LEVEL > 0) - perror("futex"); - dprintf1("futex() ret: %d\n", futex_ret); -} - -/* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) -{ - int err; - int i; - - /* Note: 0 is the default pkey, so don't mess with it */ - for (i = 1; i < NR_PKEYS; i++) { - if (pkey == i) - continue; - - dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i); - err = sys_pkey_free(i); - pkey_assert(err); - - err = sys_pkey_free(i); - pkey_assert(err); - - err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i); - pkey_assert(err); - } -} - -/* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) -{ - int err; - int bad_pkey = NR_PKEYS+99; - - /* pass a known-invalid pkey in: */ - err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey); - pkey_assert(err); -} - -void become_child(void) -{ - pid_t forkret; - - forkret = fork(); - pkey_assert(forkret >= 0); - dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); - - if (!forkret) { - /* in the child */ - return; - } - exit(0); -} - -/* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_alloc_exhaust(int *ptr, u16 pkey) -{ - int err; - int allocated_pkeys[NR_PKEYS] = {0}; - int nr_allocated_pkeys = 0; - int i; - - for (i = 0; i < NR_PKEYS*3; i++) { - int new_pkey; - dprintf1("%s() alloc loop: %d\n", __func__, i); - new_pkey = alloc_pkey(); - dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__, - __LINE__, err, __rdpkru(), shadow_pkru); - rdpkru(); /* for shadow checking */ - dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC); - if ((new_pkey == -1) && (errno == ENOSPC)) { - dprintf2("%s() failed to allocate pkey after %d tries\n", - __func__, nr_allocated_pkeys); - } else { - /* - * Ensure the number of successes never - * exceeds the number of keys supported - * in the hardware. - */ - pkey_assert(nr_allocated_pkeys < NR_PKEYS); - allocated_pkeys[nr_allocated_pkeys++] = new_pkey; - } - - /* - * Make sure that allocation state is properly - * preserved across fork(). - */ - if (i == NR_PKEYS*2) - become_child(); - } - - dprintf3("%s()::%d\n", __func__, __LINE__); - - /* - * There are 16 pkeys supported in hardware. Three are - * allocated by the time we get here: - * 1. The default key (0) - * 2. One possibly consumed by an execute-only mapping. - * 3. One allocated by the test code and passed in via - * 'pkey' to this function. - * Ensure that we can allocate at least another 13 (16-3). - */ - pkey_assert(i >= NR_PKEYS-3); - - for (i = 0; i < nr_allocated_pkeys; i++) { - err = sys_pkey_free(allocated_pkeys[i]); - pkey_assert(!err); - rdpkru(); /* for shadow checking */ - } -} - -/* - * pkey 0 is special. It is allocated by default, so you do not - * have to call pkey_alloc() to use it first. Make sure that it - * is usable. - */ -void test_mprotect_with_pkey_0(int *ptr, u16 pkey) -{ - long size; - int prot; - - assert(pkey_last_malloc_record); - size = pkey_last_malloc_record->size; - /* - * This is a bit of a hack. But mprotect() requires - * huge-page-aligned sizes when operating on hugetlbfs. - * So, make sure that we use something that's a multiple - * of a huge page when we can. - */ - if (size >= HPAGE_SIZE) - size = HPAGE_SIZE; - prot = pkey_last_malloc_record->prot; - - /* Use pkey 0 */ - mprotect_pkey(ptr, size, prot, 0); - - /* Make sure that we can set it back to the original pkey. */ - mprotect_pkey(ptr, size, prot, pkey); -} - -void test_ptrace_of_child(int *ptr, u16 pkey) -{ - __attribute__((__unused__)) int peek_result; - pid_t child_pid; - void *ignored = 0; - long ret; - int status; - /* - * This is the "control" for our little expermient. Make sure - * we can always access it when ptracing. - */ - int *plain_ptr_unaligned = malloc(HPAGE_SIZE); - int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE); - - /* - * Fork a child which is an exact copy of this process, of course. - * That means we can do all of our tests via ptrace() and then plain - * memory access and ensure they work differently. - */ - child_pid = fork_lazy_child(); - dprintf1("[%d] child pid: %d\n", getpid(), child_pid); - - ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored); - if (ret) - perror("attach"); - dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__); - pkey_assert(ret != -1); - ret = waitpid(child_pid, &status, WUNTRACED); - if ((ret != child_pid) || !(WIFSTOPPED(status))) { - fprintf(stderr, "weird waitpid result %ld stat %x\n", - ret, status); - pkey_assert(0); - } - dprintf2("waitpid ret: %ld\n", ret); - dprintf2("waitpid status: %d\n", status); - - pkey_access_deny(pkey); - pkey_write_deny(pkey); - - /* Write access, untested for now: - ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data); - pkey_assert(ret != -1); - dprintf1("poke at %p: %ld\n", peek_at, ret); - */ - - /* - * Try to access the pkey-protected "ptr" via ptrace: - */ - ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored); - /* expect it to work, without an error: */ - pkey_assert(ret != -1); - /* Now access from the current task, and expect an exception: */ - peek_result = read_ptr(ptr); - expected_pk_fault(pkey); - - /* - * Try to access the NON-pkey-protected "plain_ptr" via ptrace: - */ - ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored); - /* expect it to work, without an error: */ - pkey_assert(ret != -1); - /* Now access from the current task, and expect NO exception: */ - peek_result = read_ptr(plain_ptr); - do_not_expect_pk_fault("read plain pointer after ptrace"); - - ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); - pkey_assert(ret != -1); - - ret = kill(child_pid, SIGKILL); - pkey_assert(ret != -1); - - wait(&status); - - free(plain_ptr_unaligned); -} - -void *get_pointer_to_instructions(void) -{ - void *p1; - - p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); - dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); - /* lots_o_noops_around_write should be page-aligned already */ - assert(p1 == &lots_o_noops_around_write); - - /* Point 'p1' at the *second* page of the function: */ - p1 += PAGE_SIZE; - - /* - * Try to ensure we fault this in on next touch to ensure - * we get an instruction fault as opposed to a data one - */ - madvise(p1, PAGE_SIZE, MADV_DONTNEED); - - return p1; -} - -void test_executing_on_unreadable_memory(int *ptr, u16 pkey) -{ - void *p1; - int scratch; - int ptr_contents; - int ret; - - p1 = get_pointer_to_instructions(); - lots_o_noops_around_write(&scratch); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - - ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey); - pkey_assert(!ret); - pkey_access_deny(pkey); - - dprintf2("pkru: %x\n", rdpkru()); - - /* - * Make sure this is an *instruction* fault - */ - madvise(p1, PAGE_SIZE, MADV_DONTNEED); - lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault("executing on PROT_EXEC memory"); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pk_fault(pkey); -} - -void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) -{ - void *p1; - int scratch; - int ptr_contents; - int ret; - - dprintf1("%s() start\n", __func__); - - p1 = get_pointer_to_instructions(); - lots_o_noops_around_write(&scratch); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - - /* Use a *normal* mprotect(), not mprotect_pkey(): */ - ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); - pkey_assert(!ret); - - dprintf2("pkru: %x\n", rdpkru()); - - /* Make sure this is an *instruction* fault */ - madvise(p1, PAGE_SIZE, MADV_DONTNEED); - lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault("executing on PROT_EXEC memory"); - ptr_contents = read_ptr(p1); - dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); - expected_pk_fault(UNKNOWN_PKEY); - - /* - * Put the memory back to non-PROT_EXEC. Should clear the - * exec-only pkey off the VMA and allow it to be readable - * again. Go to PROT_NONE first to check for a kernel bug - * that did not clear the pkey when doing PROT_NONE. - */ - ret = mprotect(p1, PAGE_SIZE, PROT_NONE); - pkey_assert(!ret); - - ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); - pkey_assert(!ret); - ptr_contents = read_ptr(p1); - do_not_expect_pk_fault("plain read on recently PROT_EXEC area"); -} - -void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) -{ - int size = PAGE_SIZE; - int sret; - - if (cpu_has_pku()) { - dprintf1("SKIP: %s: no CPU support\n", __func__); - return; - } - - sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); - pkey_assert(sret < 0); -} - -void (*pkey_tests[])(int *ptr, u16 pkey) = { - test_read_of_write_disabled_region, - test_read_of_access_disabled_region, - test_write_of_write_disabled_region, - test_write_of_access_disabled_region, - test_kernel_write_of_access_disabled_region, - test_kernel_write_of_write_disabled_region, - test_kernel_gup_of_access_disabled_region, - test_kernel_gup_write_to_write_disabled_region, - test_executing_on_unreadable_memory, - test_implicit_mprotect_exec_only_memory, - test_mprotect_with_pkey_0, - test_ptrace_of_child, - test_pkey_syscalls_on_non_allocated_pkey, - test_pkey_syscalls_bad_args, - test_pkey_alloc_exhaust, -}; - -void run_tests_once(void) -{ - int *ptr; - int prot = PROT_READ|PROT_WRITE; - - for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) { - int pkey; - int orig_pkru_faults = pkru_faults; - - dprintf1("======================\n"); - dprintf1("test %d preparing...\n", test_nr); - - tracing_on(); - pkey = alloc_random_pkey(); - dprintf1("test %d starting with pkey: %d\n", test_nr, pkey); - ptr = malloc_pkey(PAGE_SIZE, prot, pkey); - dprintf1("test %d starting...\n", test_nr); - pkey_tests[test_nr](ptr, pkey); - dprintf1("freeing test memory: %p\n", ptr); - free_pkey_malloc(ptr); - sys_pkey_free(pkey); - - dprintf1("pkru_faults: %d\n", pkru_faults); - dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults); - - tracing_off(); - close_test_fds(); - - printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr); - dprintf1("======================\n\n"); - } - iteration_nr++; -} - -void pkey_setup_shadow(void) -{ - shadow_pkru = __rdpkru(); -} - -int main(void) -{ - int nr_iterations = 22; - - setup_handlers(); - - printf("has pku: %d\n", cpu_has_pku()); - - if (!cpu_has_pku()) { - int size = PAGE_SIZE; - int *ptr; - - printf("running PKEY tests for unsupported CPU/OS\n"); - - ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - assert(ptr != (void *)-1); - test_mprotect_pkey_on_unsupported_cpu(ptr, 1); - exit(0); - } - - pkey_setup_shadow(); - printf("startup pkru: %x\n", rdpkru()); - setup_hugetlbfs(); - - while (nr_iterations-- > 0) - run_tests_once(); - - printf("done (all tests OK)\n"); - return 0; -} |