aboutsummaryrefslogtreecommitdiff
path: root/tools/testing/selftests/mm
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/mm')
-rw-r--r--tools/testing/selftests/mm/.gitignore4
-rw-r--r--tools/testing/selftests/mm/Makefile32
-rw-r--r--tools/testing/selftests/mm/guard-pages.c1243
-rw-r--r--tools/testing/selftests/mm/hugetlb_dio.c21
-rw-r--r--tools/testing/selftests/mm/hugetlb_fault_after_madv.c48
-rw-r--r--tools/testing/selftests/mm/page_frag/Makefile18
-rw-r--r--tools/testing/selftests/mm/page_frag/page_frag_test.c198
-rw-r--r--tools/testing/selftests/mm/pkey-arm64.h3
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h7
-rw-r--r--tools/testing/selftests/mm/pkey-x86.h2
-rw-r--r--tools/testing/selftests/mm/pkey_sighandler_tests.c115
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh18
-rwxr-xr-xtools/testing/selftests/mm/test_page_frag.sh175
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c4
14 files changed, 1837 insertions, 51 deletions
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index da030b43e43b..8f01f4da1c0d 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -51,3 +51,7 @@ hugetlb_madv_vs_map
mseal_test
seal_elf
droppable
+hugetlb_dio
+pkey_sighandler_tests_32
+pkey_sighandler_tests_64
+guard-pages
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 02e1204971b0..3de23ea4663f 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -36,6 +36,17 @@ MAKEFLAGS += --no-builtin-rules
CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
LDLIBS = -lrt -lpthread -lm
+KDIR ?= /lib/modules/$(shell uname -r)/build
+ifneq (,$(wildcard $(KDIR)/Module.symvers))
+ifneq (,$(wildcard $(KDIR)/include/linux/page_frag_cache.h))
+TEST_GEN_MODS_DIR := page_frag
+else
+PAGE_FRAG_WARNING = "missing page_frag_cache.h, please use a newer kernel"
+endif
+else
+PAGE_FRAG_WARNING = "missing Module.symvers, please have the kernel built first"
+endif
+
TEST_GEN_FILES = cow
TEST_GEN_FILES += compaction_test
TEST_GEN_FILES += gup_longterm
@@ -79,6 +90,7 @@ TEST_GEN_FILES += hugetlb_fault_after_madv
TEST_GEN_FILES += hugetlb_madv_vs_map
TEST_GEN_FILES += hugetlb_dio
TEST_GEN_FILES += droppable
+TEST_GEN_FILES += guard-pages
ifneq ($(ARCH),arm64)
TEST_GEN_FILES += soft-dirty
@@ -105,17 +117,19 @@ endif
ifeq ($(CAN_BUILD_X86_64),1)
TEST_GEN_FILES += $(BINARIES_64)
endif
-else
-ifneq (,$(filter $(ARCH),arm64 powerpc))
+else ifeq ($(ARCH),arm64)
+TEST_GEN_FILES += protection_keys
+TEST_GEN_FILES += pkey_sighandler_tests
+else ifeq ($(ARCH),powerpc)
TEST_GEN_FILES += protection_keys
-endif
-
endif
ifneq (,$(filter $(ARCH),arm64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390))
TEST_GEN_FILES += va_high_addr_switch
+ifneq ($(ARCH),riscv64)
TEST_GEN_FILES += virtual_address_range
+endif
TEST_GEN_FILES += write_to_hugetlbfs
endif
@@ -126,6 +140,7 @@ TEST_FILES += test_hmm.sh
TEST_FILES += va_high_addr_switch.sh
TEST_FILES += charge_reserved_hugetlb.sh
TEST_FILES += hugetlb_reparenting_test.sh
+TEST_FILES += test_page_frag.sh
# required by charge_reserved_hugetlb.sh
TEST_FILES += write_hugetlb_memory.sh
@@ -211,3 +226,12 @@ warn_missing_liburing:
echo "Warning: missing liburing support. Some tests will be skipped." ; \
echo
endif
+
+ifneq ($(PAGE_FRAG_WARNING),)
+all: warn_missing_page_frag
+
+warn_missing_page_frag:
+ @echo ; \
+ echo "Warning: $(PAGE_FRAG_WARNING). page_frag test will be skipped." ; \
+ echo
+endif
diff --git a/tools/testing/selftests/mm/guard-pages.c b/tools/testing/selftests/mm/guard-pages.c
new file mode 100644
index 000000000000..7cdf815d0d63
--- /dev/null
+++ b/tools/testing/selftests/mm/guard-pages.c
@@ -0,0 +1,1243 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#include "../kselftest_harness.h"
+#include <asm-generic/mman.h> /* Force the import of the tools version. */
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/userfaultfd.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+/*
+ * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1:
+ *
+ * "If the signal occurs other than as the result of calling the abort or raise
+ * function, the behavior is undefined if the signal handler refers to any
+ * object with static storage duration other than by assigning a value to an
+ * object declared as volatile sig_atomic_t"
+ */
+static volatile sig_atomic_t signal_jump_set;
+static sigjmp_buf signal_jmp_buf;
+
+/*
+ * Ignore the checkpatch warning, we must read from x but don't want to do
+ * anything with it in order to trigger a read page fault. We therefore must use
+ * volatile to stop the compiler from optimising this away.
+ */
+#define FORCE_READ(x) (*(volatile typeof(x) *)x)
+
+static int userfaultfd(int flags)
+{
+ return syscall(SYS_userfaultfd, flags);
+}
+
+static void handle_fatal(int c)
+{
+ if (!signal_jump_set)
+ return;
+
+ siglongjmp(signal_jmp_buf, c);
+}
+
+static int pidfd_open(pid_t pid, unsigned int flags)
+{
+ return syscall(SYS_pidfd_open, pid, flags);
+}
+
+/*
+ * Enable our signal catcher and try to read/write the specified buffer. The
+ * return value indicates whether the read/write succeeds without a fatal
+ * signal.
+ */
+static bool try_access_buf(char *ptr, bool write)
+{
+ bool failed;
+
+ /* Tell signal handler to jump back here on fatal signal. */
+ signal_jump_set = true;
+ /* If a fatal signal arose, we will jump back here and failed is set. */
+ failed = sigsetjmp(signal_jmp_buf, 0) != 0;
+
+ if (!failed) {
+ if (write)
+ *ptr = 'x';
+ else
+ FORCE_READ(ptr);
+ }
+
+ signal_jump_set = false;
+ return !failed;
+}
+
+/* Try and read from a buffer, return true if no fatal signal. */
+static bool try_read_buf(char *ptr)
+{
+ return try_access_buf(ptr, false);
+}
+
+/* Try and write to a buffer, return true if no fatal signal. */
+static bool try_write_buf(char *ptr)
+{
+ return try_access_buf(ptr, true);
+}
+
+/*
+ * Try and BOTH read from AND write to a buffer, return true if BOTH operations
+ * succeed.
+ */
+static bool try_read_write_buf(char *ptr)
+{
+ return try_read_buf(ptr) && try_write_buf(ptr);
+}
+
+FIXTURE(guard_pages)
+{
+ unsigned long page_size;
+};
+
+FIXTURE_SETUP(guard_pages)
+{
+ struct sigaction act = {
+ .sa_handler = &handle_fatal,
+ .sa_flags = SA_NODEFER,
+ };
+
+ sigemptyset(&act.sa_mask);
+ if (sigaction(SIGSEGV, &act, NULL))
+ ksft_exit_fail_perror("sigaction");
+
+ self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
+};
+
+FIXTURE_TEARDOWN(guard_pages)
+{
+ struct sigaction act = {
+ .sa_handler = SIG_DFL,
+ .sa_flags = SA_NODEFER,
+ };
+
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGSEGV, &act, NULL);
+}
+
+TEST_F(guard_pages, basic)
+{
+ const unsigned long NUM_PAGES = 10;
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ ptr = mmap(NULL, NUM_PAGES * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Trivially assert we can touch the first page. */
+ ASSERT_TRUE(try_read_write_buf(ptr));
+
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Establish that 1st page SIGSEGV's. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+
+ /* Ensure we can touch everything else.*/
+ for (i = 1; i < NUM_PAGES; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Establish a guard page at the end of the mapping. */
+ ASSERT_EQ(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size,
+ MADV_GUARD_INSTALL), 0);
+
+ /* Check that both guard pages result in SIGSEGV. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size]));
+
+ /* Remove the first guard page. */
+ ASSERT_FALSE(madvise(ptr, page_size, MADV_GUARD_REMOVE));
+
+ /* Make sure we can touch it. */
+ ASSERT_TRUE(try_read_write_buf(ptr));
+
+ /* Remove the last guard page. */
+ ASSERT_FALSE(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size,
+ MADV_GUARD_REMOVE));
+
+ /* Make sure we can touch it. */
+ ASSERT_TRUE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size]));
+
+ /*
+ * Test setting a _range_ of pages, namely the first 3. The first of
+ * these be faulted in, so this also tests that we can install guard
+ * pages over backed pages.
+ */
+ ASSERT_EQ(madvise(ptr, 3 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure they are all guard pages. */
+ for (i = 0; i < 3; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Make sure the rest are not. */
+ for (i = 3; i < NUM_PAGES; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Remove guard pages. */
+ ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Now make sure we can touch everything. */
+ for (i = 0; i < NUM_PAGES; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /*
+ * Now remove all guard pages, make sure we don't remove existing
+ * entries.
+ */
+ ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0);
+
+ for (i = 0; i < NUM_PAGES * page_size; i += page_size) {
+ char chr = ptr[i];
+
+ ASSERT_EQ(chr, 'x');
+ }
+
+ ASSERT_EQ(munmap(ptr, NUM_PAGES * page_size), 0);
+}
+
+/* Assert that operations applied across multiple VMAs work as expected. */
+TEST_F(guard_pages, multi_vma)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr_region, *ptr, *ptr1, *ptr2, *ptr3;
+ int i;
+
+ /* Reserve a 100 page region over which we can install VMAs. */
+ ptr_region = mmap(NULL, 100 * page_size, PROT_NONE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr_region, MAP_FAILED);
+
+ /* Place a VMA of 10 pages size at the start of the region. */
+ ptr1 = mmap(ptr_region, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr1, MAP_FAILED);
+
+ /* Place a VMA of 5 pages size 50 pages into the region. */
+ ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+
+ /* Place a VMA of 20 pages size at the end of the region. */
+ ptr3 = mmap(&ptr_region[80 * page_size], 20 * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr3, MAP_FAILED);
+
+ /* Unmap gaps. */
+ ASSERT_EQ(munmap(&ptr_region[10 * page_size], 40 * page_size), 0);
+ ASSERT_EQ(munmap(&ptr_region[55 * page_size], 25 * page_size), 0);
+
+ /*
+ * We end up with VMAs like this:
+ *
+ * 0 10 .. 50 55 .. 80 100
+ * [---] [---] [---]
+ */
+
+ /*
+ * Now mark the whole range as guard pages and make sure all VMAs are as
+ * such.
+ */
+
+ /*
+ * madvise() is certifiable and lets you perform operations over gaps,
+ * everything works, but it indicates an error and errno is set to
+ * -ENOMEM. Also if anything runs out of memory it is set to
+ * -ENOMEM. You are meant to guess which is which.
+ */
+ ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), -1);
+ ASSERT_EQ(errno, ENOMEM);
+
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr1[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ for (i = 0; i < 5; i++) {
+ char *curr = &ptr2[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ for (i = 0; i < 20; i++) {
+ char *curr = &ptr3[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now remove guar pages over range and assert the opposite. */
+
+ ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), -1);
+ ASSERT_EQ(errno, ENOMEM);
+
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr1[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ for (i = 0; i < 5; i++) {
+ char *curr = &ptr2[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ for (i = 0; i < 20; i++) {
+ char *curr = &ptr3[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Now map incompatible VMAs in the gaps. */
+ ptr = mmap(&ptr_region[10 * page_size], 40 * page_size,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ ptr = mmap(&ptr_region[55 * page_size], 25 * page_size,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * We end up with VMAs like this:
+ *
+ * 0 10 .. 50 55 .. 80 100
+ * [---][xxxx][---][xxxx][---]
+ *
+ * Where 'x' signifies VMAs that cannot be merged with those adjacent to
+ * them.
+ */
+
+ /* Multiple VMAs adjacent to one another should result in no error. */
+ ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), 0);
+ for (i = 0; i < 100; i++) {
+ char *curr = &ptr_region[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+ ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), 0);
+ for (i = 0; i < 100; i++) {
+ char *curr = &ptr_region[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr_region, 100 * page_size), 0);
+}
+
+/*
+ * Assert that batched operations performed using process_madvise() work as
+ * expected.
+ */
+TEST_F(guard_pages, process_madvise)
+{
+ const unsigned long page_size = self->page_size;
+ pid_t pid = getpid();
+ int pidfd = pidfd_open(pid, 0);
+ char *ptr_region, *ptr1, *ptr2, *ptr3;
+ ssize_t count;
+ struct iovec vec[6];
+
+ ASSERT_NE(pidfd, -1);
+
+ /* Reserve region to map over. */
+ ptr_region = mmap(NULL, 100 * page_size, PROT_NONE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr_region, MAP_FAILED);
+
+ /*
+ * 10 pages offset 1 page into reserve region. We MAP_POPULATE so we
+ * overwrite existing entries and test this code path against
+ * overwriting existing entries.
+ */
+ ptr1 = mmap(&ptr_region[page_size], 10 * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE | MAP_POPULATE, -1, 0);
+ ASSERT_NE(ptr1, MAP_FAILED);
+ /* We want guard markers at start/end of each VMA. */
+ vec[0].iov_base = ptr1;
+ vec[0].iov_len = page_size;
+ vec[1].iov_base = &ptr1[9 * page_size];
+ vec[1].iov_len = page_size;
+
+ /* 5 pages offset 50 pages into reserve region. */
+ ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr2, MAP_FAILED);
+ vec[2].iov_base = ptr2;
+ vec[2].iov_len = page_size;
+ vec[3].iov_base = &ptr2[4 * page_size];
+ vec[3].iov_len = page_size;
+
+ /* 20 pages offset 79 pages into reserve region. */
+ ptr3 = mmap(&ptr_region[79 * page_size], 20 * page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr3, MAP_FAILED);
+ vec[4].iov_base = ptr3;
+ vec[4].iov_len = page_size;
+ vec[5].iov_base = &ptr3[19 * page_size];
+ vec[5].iov_len = page_size;
+
+ /* Free surrounding VMAs. */
+ ASSERT_EQ(munmap(ptr_region, page_size), 0);
+ ASSERT_EQ(munmap(&ptr_region[11 * page_size], 39 * page_size), 0);
+ ASSERT_EQ(munmap(&ptr_region[55 * page_size], 24 * page_size), 0);
+ ASSERT_EQ(munmap(&ptr_region[99 * page_size], page_size), 0);
+
+ /* Now guard in one step. */
+ count = process_madvise(pidfd, vec, 6, MADV_GUARD_INSTALL, 0);
+
+ /* OK we don't have permission to do this, skip. */
+ if (count == -1 && errno == EPERM)
+ ksft_exit_skip("No process_madvise() permissions, try running as root.\n");
+
+ /* Returns the number of bytes advised. */
+ ASSERT_EQ(count, 6 * page_size);
+
+ /* Now make sure the guarding was applied. */
+
+ ASSERT_FALSE(try_read_write_buf(ptr1));
+ ASSERT_FALSE(try_read_write_buf(&ptr1[9 * page_size]));
+
+ ASSERT_FALSE(try_read_write_buf(ptr2));
+ ASSERT_FALSE(try_read_write_buf(&ptr2[4 * page_size]));
+
+ ASSERT_FALSE(try_read_write_buf(ptr3));
+ ASSERT_FALSE(try_read_write_buf(&ptr3[19 * page_size]));
+
+ /* Now do the same with unguard... */
+ count = process_madvise(pidfd, vec, 6, MADV_GUARD_REMOVE, 0);
+
+ /* ...and everything should now succeed. */
+
+ ASSERT_TRUE(try_read_write_buf(ptr1));
+ ASSERT_TRUE(try_read_write_buf(&ptr1[9 * page_size]));
+
+ ASSERT_TRUE(try_read_write_buf(ptr2));
+ ASSERT_TRUE(try_read_write_buf(&ptr2[4 * page_size]));
+
+ ASSERT_TRUE(try_read_write_buf(ptr3));
+ ASSERT_TRUE(try_read_write_buf(&ptr3[19 * page_size]));
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr1, 10 * page_size), 0);
+ ASSERT_EQ(munmap(ptr2, 5 * page_size), 0);
+ ASSERT_EQ(munmap(ptr3, 20 * page_size), 0);
+ close(pidfd);
+}
+
+/* Assert that unmapping ranges does not leave guard markers behind. */
+TEST_F(guard_pages, munmap)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr_new1, *ptr_new2;
+
+ ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard first and last pages. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ ASSERT_EQ(madvise(&ptr[9 * page_size], page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Assert that they are guarded. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[9 * page_size]));
+
+ /* Unmap them. */
+ ASSERT_EQ(munmap(ptr, page_size), 0);
+ ASSERT_EQ(munmap(&ptr[9 * page_size], page_size), 0);
+
+ /* Map over them.*/
+ ptr_new1 = mmap(ptr, page_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr_new1, MAP_FAILED);
+ ptr_new2 = mmap(&ptr[9 * page_size], page_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr_new2, MAP_FAILED);
+
+ /* Assert that they are now not guarded. */
+ ASSERT_TRUE(try_read_write_buf(ptr_new1));
+ ASSERT_TRUE(try_read_write_buf(ptr_new2));
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Assert that mprotect() operations have no bearing on guard markers. */
+TEST_F(guard_pages, mprotect)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard the middle of the range. */
+ ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size,
+ MADV_GUARD_INSTALL), 0);
+
+ /* Assert that it is indeed guarded. */
+ ASSERT_FALSE(try_read_write_buf(&ptr[5 * page_size]));
+ ASSERT_FALSE(try_read_write_buf(&ptr[6 * page_size]));
+
+ /* Now make these pages read-only. */
+ ASSERT_EQ(mprotect(&ptr[5 * page_size], 2 * page_size, PROT_READ), 0);
+
+ /* Make sure the range is still guarded. */
+ ASSERT_FALSE(try_read_buf(&ptr[5 * page_size]));
+ ASSERT_FALSE(try_read_buf(&ptr[6 * page_size]));
+
+ /* Make sure we can guard again without issue.*/
+ ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size,
+ MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the range is, yet again, still guarded. */
+ ASSERT_FALSE(try_read_buf(&ptr[5 * page_size]));
+ ASSERT_FALSE(try_read_buf(&ptr[6 * page_size]));
+
+ /* Now unguard the whole range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Make sure the whole range is readable. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Split and merge VMAs and make sure guard pages still behave. */
+TEST_F(guard_pages, split_merge)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr_new;
+ int i;
+
+ ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard the whole range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the whole range is guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now unmap some pages in the range so we split. */
+ ASSERT_EQ(munmap(&ptr[2 * page_size], page_size), 0);
+ ASSERT_EQ(munmap(&ptr[5 * page_size], page_size), 0);
+ ASSERT_EQ(munmap(&ptr[8 * page_size], page_size), 0);
+
+ /* Make sure the remaining ranges are guarded post-split. */
+ for (i = 0; i < 2; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+ for (i = 2; i < 5; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+ for (i = 6; i < 8; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+ for (i = 9; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now map them again - the unmap will have cleared the guards. */
+ ptr_new = mmap(&ptr[2 * page_size], page_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+ ptr_new = mmap(&ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+ ptr_new = mmap(&ptr[8 * page_size], page_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+
+ /* Now make sure guard pages are established. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+ bool expect_true = i == 2 || i == 5 || i == 8;
+
+ ASSERT_TRUE(expect_true ? result : !result);
+ }
+
+ /* Now guard everything again. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the whole range is guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now split the range into three. */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* Make sure the whole range is guarded for read. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_buf(curr));
+ }
+
+ /* Now reset protection bits so we merge the whole thing. */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0);
+ ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size,
+ PROT_READ | PROT_WRITE), 0);
+
+ /* Make sure the whole range is still guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Split range into 3 again... */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0);
+ ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0);
+
+ /* ...and unguard the whole range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Make sure the whole range is remedied for read. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_buf(curr));
+ }
+
+ /* Merge them again. */
+ ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0);
+ ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size,
+ PROT_READ | PROT_WRITE), 0);
+
+ /* Now ensure the merged range is remedied for read/write. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Assert that MADV_DONTNEED does not remove guard markers. */
+TEST_F(guard_pages, dontneed)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Back the whole range. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ *curr = 'y';
+ }
+
+ /* Guard every other page. */
+ for (i = 0; i < 10; i += 2) {
+ char *curr = &ptr[i * page_size];
+ int res = madvise(curr, page_size, MADV_GUARD_INSTALL);
+
+ ASSERT_EQ(res, 0);
+ }
+
+ /* Indicate that we don't need any of the range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_DONTNEED), 0);
+
+ /* Check to ensure guard markers are still in place. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_buf(curr);
+
+ if (i % 2 == 0) {
+ ASSERT_FALSE(result);
+ } else {
+ ASSERT_TRUE(result);
+ /* Make sure we really did get reset to zero page. */
+ ASSERT_EQ(*curr, '\0');
+ }
+
+ /* Now write... */
+ result = try_write_buf(&ptr[i * page_size]);
+
+ /* ...and make sure same result. */
+ ASSERT_TRUE(i % 2 != 0 ? result : !result);
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Assert that mlock()'ed pages work correctly with guard markers. */
+TEST_F(guard_pages, mlock)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Populate. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ *curr = 'y';
+ }
+
+ /* Lock. */
+ ASSERT_EQ(mlock(ptr, 10 * page_size), 0);
+
+ /* Now try to guard, should fail with EINVAL. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), -1);
+ ASSERT_EQ(errno, EINVAL);
+
+ /* OK unlock. */
+ ASSERT_EQ(munlock(ptr, 10 * page_size), 0);
+
+ /* Guard first half of range, should now succeed. */
+ ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure guard works. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+
+ if (i < 5) {
+ ASSERT_FALSE(result);
+ } else {
+ ASSERT_TRUE(result);
+ ASSERT_EQ(*curr, 'x');
+ }
+ }
+
+ /*
+ * Now lock the latter part of the range. We can't lock the guard pages,
+ * as this would result in the pages being populated and the guarding
+ * would cause this to error out.
+ */
+ ASSERT_EQ(mlock(&ptr[5 * page_size], 5 * page_size), 0);
+
+ /*
+ * Now remove guard pages, we permit mlock()'d ranges to have guard
+ * pages removed as it is a non-destructive operation.
+ */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Now check that no guard pages remain. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Assert that moving, extending and shrinking memory via mremap() retains
+ * guard markers where possible.
+ *
+ * - Moving a mapping alone should retain markers as they are.
+ */
+TEST_F(guard_pages, mremap_move)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr_new;
+
+ /* Map 5 pages. */
+ ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Place guard markers at both ends of the 5 page span. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the guard pages are in effect. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /* Map a new region we will move this range into. Doing this ensures
+ * that we have reserved a range to map into.
+ */
+ ptr_new = mmap(NULL, 5 * page_size, PROT_NONE, MAP_ANON | MAP_PRIVATE,
+ -1, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+
+ ASSERT_EQ(mremap(ptr, 5 * page_size, 5 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new), ptr_new);
+
+ /* Make sure the guard markers are retained. */
+ ASSERT_FALSE(try_read_write_buf(ptr_new));
+ ASSERT_FALSE(try_read_write_buf(&ptr_new[4 * page_size]));
+
+ /*
+ * Clean up - we only need reference the new pointer as we overwrote the
+ * PROT_NONE range and moved the existing one.
+ */
+ munmap(ptr_new, 5 * page_size);
+}
+
+/*
+ * Assert that moving, extending and shrinking memory via mremap() retains
+ * guard markers where possible.
+ *
+ * Expanding should retain guard pages, only now in different position. The user
+ * will have to remove guard pages manually to fix up (they'd have to do the
+ * same if it were a PROT_NONE mapping).
+ */
+TEST_F(guard_pages, mremap_expand)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr, *ptr_new;
+
+ /* Map 10 pages... */
+ ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ /* ...But unmap the last 5 so we can ensure we can expand into them. */
+ ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0);
+
+ /* Place guard markers at both ends of the 5 page span. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the guarding is in effect. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /* Now expand to 10 pages. */
+ ptr = mremap(ptr, 5 * page_size, 10 * page_size, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Make sure the guard markers are retained in their original positions.
+ */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /* Reserve a region which we can move to and expand into. */
+ ptr_new = mmap(NULL, 20 * page_size, PROT_NONE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr_new, MAP_FAILED);
+
+ /* Now move and expand into it. */
+ ptr = mremap(ptr, 10 * page_size, 20 * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new);
+ ASSERT_EQ(ptr, ptr_new);
+
+ /*
+ * Again, make sure the guard markers are retained in their original positions.
+ */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /*
+ * A real user would have to remove guard markers, but would reasonably
+ * expect all characteristics of the mapping to be retained, including
+ * guard markers.
+ */
+
+ /* Cleanup. */
+ munmap(ptr, 20 * page_size);
+}
+/*
+ * Assert that moving, extending and shrinking memory via mremap() retains
+ * guard markers where possible.
+ *
+ * Shrinking will result in markers that are shrunk over being removed. Again,
+ * if the user were using a PROT_NONE mapping they'd have to manually fix this
+ * up also so this is OK.
+ */
+TEST_F(guard_pages, mremap_shrink)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ /* Map 5 pages. */
+ ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Place guard markers at both ends of the 5 page span. */
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Make sure the guarding is in effect. */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+ ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
+
+ /* Now shrink to 3 pages. */
+ ptr = mremap(ptr, 5 * page_size, 3 * page_size, MREMAP_MAYMOVE);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* We expect the guard marker at the start to be retained... */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+
+ /* ...But remaining pages will not have guard markers. */
+ for (i = 1; i < 3; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /*
+ * As with expansion, a real user would have to remove guard pages and
+ * fixup. But you'd have to do similar manual things with PROT_NONE
+ * mappings too.
+ */
+
+ /*
+ * If we expand back to the original size, the end marker will, of
+ * course, no longer be present.
+ */
+ ptr = mremap(ptr, 3 * page_size, 5 * page_size, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Again, we expect the guard marker at the start to be retained... */
+ ASSERT_FALSE(try_read_write_buf(ptr));
+
+ /* ...But remaining pages will not have guard markers. */
+ for (i = 1; i < 5; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ munmap(ptr, 5 * page_size);
+}
+
+/*
+ * Assert that forking a process with VMAs that do not have VM_WIPEONFORK set
+ * retain guard pages.
+ */
+TEST_F(guard_pages, fork)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ pid_t pid;
+ int i;
+
+ /* Map 10 pages. */
+ ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Establish guard apges in the first 5 pages. */
+ ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+ if (!pid) {
+ /* This is the child process now. */
+
+ /* Assert that the guarding is in effect. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+
+ ASSERT_TRUE(i >= 5 ? result : !result);
+ }
+
+ /* Now unguard the range.*/
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ exit(0);
+ }
+
+ /* Parent process. */
+
+ /* Parent simply waits on child. */
+ waitpid(pid, NULL, 0);
+
+ /* Child unguard does not impact parent page table state. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+
+ ASSERT_TRUE(i >= 5 ? result : !result);
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Assert that forking a process with VMAs that do have VM_WIPEONFORK set
+ * behave as expected.
+ */
+TEST_F(guard_pages, fork_wipeonfork)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ pid_t pid;
+ int i;
+
+ /* Map 10 pages. */
+ ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Mark wipe on fork. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_WIPEONFORK), 0);
+
+ /* Guard the first 5 pages. */
+ ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0);
+
+ pid = fork();
+ ASSERT_NE(pid, -1);
+ if (!pid) {
+ /* This is the child process now. */
+
+ /* Guard will have been wiped. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_write_buf(curr));
+ }
+
+ exit(0);
+ }
+
+ /* Parent process. */
+
+ waitpid(pid, NULL, 0);
+
+ /* Guard markers should be in effect.*/
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+ bool result = try_read_write_buf(curr);
+
+ ASSERT_TRUE(i >= 5 ? result : !result);
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Ensure that MADV_FREE retains guard entries as expected. */
+TEST_F(guard_pages, lazyfree)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ /* Map 10 pages. */
+ ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Ensure guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Lazyfree range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_FREE), 0);
+
+ /* This should leave the guard markers in place. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Ensure that MADV_POPULATE_READ, MADV_POPULATE_WRITE behave as expected. */
+TEST_F(guard_pages, populate)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+
+ /* Map 10 pages. */
+ ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Populate read should error out... */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_READ), -1);
+ ASSERT_EQ(errno, EFAULT);
+
+ /* ...as should populate write. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_WRITE), -1);
+ ASSERT_EQ(errno, EFAULT);
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Ensure that MADV_COLD, MADV_PAGEOUT do not remove guard markers. */
+TEST_F(guard_pages, cold_pageout)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ /* Map 10 pages. */
+ ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Guard range. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* Ensured guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Now mark cold. This should have no impact on guard markers. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_COLD), 0);
+
+ /* Should remain guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* OK, now page out. This should equally, have no effect on markers. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0);
+
+ /* Should remain guarded. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/* Ensure that guard pages do not break userfaultd. */
+TEST_F(guard_pages, uffd)
+{
+ const unsigned long page_size = self->page_size;
+ int uffd;
+ char *ptr;
+ int i;
+ struct uffdio_api api = {
+ .api = UFFD_API,
+ .features = 0,
+ };
+ struct uffdio_register reg;
+ struct uffdio_range range;
+
+ /* Set up uffd. */
+ uffd = userfaultfd(0);
+ if (uffd == -1 && errno == EPERM)
+ ksft_exit_skip("No userfaultfd permissions, try running as root.\n");
+ ASSERT_NE(uffd, -1);
+
+ ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0);
+
+ /* Map 10 pages. */
+ ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANON | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Register the range with uffd. */
+ range.start = (unsigned long)ptr;
+ range.len = 10 * page_size;
+ reg.range = range;
+ reg.mode = UFFDIO_REGISTER_MODE_MISSING;
+ ASSERT_EQ(ioctl(uffd, UFFDIO_REGISTER, &reg), 0);
+
+ /* Guard the range. This should not trigger the uffd. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /* The guarding should behave as usual with no uffd intervention. */
+ for (i = 0; i < 10; i++) {
+ char *curr = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_read_write_buf(curr));
+ }
+
+ /* Cleanup. */
+ ASSERT_EQ(ioctl(uffd, UFFDIO_UNREGISTER, &range), 0);
+ close(uffd);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c
index 60001c142ce9..db63abe5ee5e 100644
--- a/tools/testing/selftests/mm/hugetlb_dio.c
+++ b/tools/testing/selftests/mm/hugetlb_dio.c
@@ -44,6 +44,13 @@ void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off)
if (fd < 0)
ksft_exit_fail_perror("Error opening file\n");
+ /* Get the free huge pages before allocation */
+ free_hpage_b = get_free_hugepages();
+ if (free_hpage_b == 0) {
+ close(fd);
+ ksft_exit_skip("No free hugepage, exiting!\n");
+ }
+
/* Allocate a hugetlb page */
orig_buffer = mmap(NULL, h_pagesize, mmap_prot, mmap_flags, -1, 0);
if (orig_buffer == MAP_FAILED) {
@@ -69,19 +76,15 @@ void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off)
/* Get the free huge pages after unmap*/
free_hpage_a = get_free_hugepages();
+ ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b);
+ ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a);
+
/*
* If the no. of free hugepages before allocation and after unmap does
* not match - that means there could still be a page which is pinned.
*/
- if (free_hpage_a != free_hpage_b) {
- ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b);
- ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a);
- ksft_test_result_fail(": Huge pages not freed!\n");
- } else {
- ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b);
- ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a);
- ksft_test_result_pass(": Huge pages freed successfully !\n");
- }
+ ksft_test_result(free_hpage_a == free_hpage_b,
+ "free huge pages from %u-%u\n", start_off, end_off);
}
int main(void)
diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
index 73b81c632366..e2640529dbb2 100644
--- a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
+++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
@@ -5,20 +5,36 @@
#include <sys/mman.h>
#include <sys/types.h>
#include <unistd.h>
+#include <setjmp.h>
+#include <signal.h>
#include "vm_util.h"
#include "../kselftest.h"
-#define MMAP_SIZE (1 << 21)
#define INLOOP_ITER 100
-char *huge_ptr;
+static char *huge_ptr;
+static size_t huge_page_size;
+
+static sigjmp_buf sigbuf;
+static bool sigbus_triggered;
+
+static void signal_handler(int signal)
+{
+ if (signal == SIGBUS) {
+ sigbus_triggered = true;
+ siglongjmp(sigbuf, 1);
+ }
+}
/* Touch the memory while it is being madvised() */
void *touch(void *unused)
{
char *ptr = (char *)huge_ptr;
+ if (sigsetjmp(sigbuf, 1))
+ return NULL;
+
for (int i = 0; i < INLOOP_ITER; i++)
ptr[0] = '.';
@@ -30,7 +46,7 @@ void *madv(void *unused)
usleep(rand() % 10);
for (int i = 0; i < INLOOP_ITER; i++)
- madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED);
+ madvise(huge_ptr, huge_page_size, MADV_DONTNEED);
return NULL;
}
@@ -44,9 +60,23 @@ int main(void)
* interactions
*/
int max = 10000;
+ int err;
+
+ ksft_print_header();
+ ksft_set_plan(1);
srand(getpid());
+ if (signal(SIGBUS, signal_handler) == SIG_ERR)
+ ksft_exit_skip("Could not register signal handler.");
+
+ huge_page_size = default_huge_page_size();
+ if (!huge_page_size)
+ ksft_exit_skip("Could not detect default hugetlb page size.");
+
+ ksft_print_msg("[INFO] detected default hugetlb page size: %zu KiB\n",
+ huge_page_size / 1024);
+
free_hugepages = get_free_hugepages();
if (free_hugepages != 1) {
ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n",
@@ -54,7 +84,7 @@ int main(void)
}
while (max--) {
- huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE,
+ huge_ptr = mmap(NULL, huge_page_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
@@ -66,8 +96,14 @@ int main(void)
pthread_join(thread1, NULL);
pthread_join(thread2, NULL);
- munmap(huge_ptr, MMAP_SIZE);
+ munmap(huge_ptr, huge_page_size);
}
- return KSFT_PASS;
+ ksft_test_result(!sigbus_triggered, "SIGBUS behavior\n");
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/mm/page_frag/Makefile b/tools/testing/selftests/mm/page_frag/Makefile
new file mode 100644
index 000000000000..8c8bb39ffa28
--- /dev/null
+++ b/tools/testing/selftests/mm/page_frag/Makefile
@@ -0,0 +1,18 @@
+PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+MODULES = page_frag_test.ko
+
+obj-m += page_frag_test.o
+
+all:
+ +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) modules
+
+clean:
+ +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) clean
diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c b/tools/testing/selftests/mm/page_frag/page_frag_test.c
new file mode 100644
index 000000000000..e806c1866e36
--- /dev/null
+++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Test module for page_frag cache
+ *
+ * Copyright (C) 2024 Yunsheng Lin <[email protected]>
+ */
+
+#include <linux/module.h>
+#include <linux/cpumask.h>
+#include <linux/completion.h>
+#include <linux/ptr_ring.h>
+#include <linux/kthread.h>
+#include <linux/page_frag_cache.h>
+
+#define TEST_FAILED_PREFIX "page_frag_test failed: "
+
+static struct ptr_ring ptr_ring;
+static int nr_objs = 512;
+static atomic_t nthreads;
+static struct completion wait;
+static struct page_frag_cache test_nc;
+static int test_popped;
+static int test_pushed;
+static bool force_exit;
+
+static int nr_test = 2000000;
+module_param(nr_test, int, 0);
+MODULE_PARM_DESC(nr_test, "number of iterations to test");
+
+static bool test_align;
+module_param(test_align, bool, 0);
+MODULE_PARM_DESC(test_align, "use align API for testing");
+
+static int test_alloc_len = 2048;
+module_param(test_alloc_len, int, 0);
+MODULE_PARM_DESC(test_alloc_len, "alloc len for testing");
+
+static int test_push_cpu;
+module_param(test_push_cpu, int, 0);
+MODULE_PARM_DESC(test_push_cpu, "test cpu for pushing fragment");
+
+static int test_pop_cpu;
+module_param(test_pop_cpu, int, 0);
+MODULE_PARM_DESC(test_pop_cpu, "test cpu for popping fragment");
+
+static int page_frag_pop_thread(void *arg)
+{
+ struct ptr_ring *ring = arg;
+
+ pr_info("page_frag pop test thread begins on cpu %d\n",
+ smp_processor_id());
+
+ while (test_popped < nr_test) {
+ void *obj = __ptr_ring_consume(ring);
+
+ if (obj) {
+ test_popped++;
+ page_frag_free(obj);
+ } else {
+ if (force_exit)
+ break;
+
+ cond_resched();
+ }
+ }
+
+ if (atomic_dec_and_test(&nthreads))
+ complete(&wait);
+
+ pr_info("page_frag pop test thread exits on cpu %d\n",
+ smp_processor_id());
+
+ return 0;
+}
+
+static int page_frag_push_thread(void *arg)
+{
+ struct ptr_ring *ring = arg;
+
+ pr_info("page_frag push test thread begins on cpu %d\n",
+ smp_processor_id());
+
+ while (test_pushed < nr_test && !force_exit) {
+ void *va;
+ int ret;
+
+ if (test_align) {
+ va = page_frag_alloc_align(&test_nc, test_alloc_len,
+ GFP_KERNEL, SMP_CACHE_BYTES);
+
+ if ((unsigned long)va & (SMP_CACHE_BYTES - 1)) {
+ force_exit = true;
+ WARN_ONCE(true, TEST_FAILED_PREFIX "unaligned va returned\n");
+ }
+ } else {
+ va = page_frag_alloc(&test_nc, test_alloc_len, GFP_KERNEL);
+ }
+
+ if (!va)
+ continue;
+
+ ret = __ptr_ring_produce(ring, va);
+ if (ret) {
+ page_frag_free(va);
+ cond_resched();
+ } else {
+ test_pushed++;
+ }
+ }
+
+ pr_info("page_frag push test thread exits on cpu %d\n",
+ smp_processor_id());
+
+ if (atomic_dec_and_test(&nthreads))
+ complete(&wait);
+
+ return 0;
+}
+
+static int __init page_frag_test_init(void)
+{
+ struct task_struct *tsk_push, *tsk_pop;
+ int last_pushed = 0, last_popped = 0;
+ ktime_t start;
+ u64 duration;
+ int ret;
+
+ page_frag_cache_init(&test_nc);
+ atomic_set(&nthreads, 2);
+ init_completion(&wait);
+
+ if (test_alloc_len > PAGE_SIZE || test_alloc_len <= 0 ||
+ !cpu_active(test_push_cpu) || !cpu_active(test_pop_cpu))
+ return -EINVAL;
+
+ ret = ptr_ring_init(&ptr_ring, nr_objs, GFP_KERNEL);
+ if (ret)
+ return ret;
+
+ tsk_push = kthread_create_on_cpu(page_frag_push_thread, &ptr_ring,
+ test_push_cpu, "page_frag_push");
+ if (IS_ERR(tsk_push))
+ return PTR_ERR(tsk_push);
+
+ tsk_pop = kthread_create_on_cpu(page_frag_pop_thread, &ptr_ring,
+ test_pop_cpu, "page_frag_pop");
+ if (IS_ERR(tsk_pop)) {
+ kthread_stop(tsk_push);
+ return PTR_ERR(tsk_pop);
+ }
+
+ start = ktime_get();
+ wake_up_process(tsk_push);
+ wake_up_process(tsk_pop);
+
+ pr_info("waiting for test to complete\n");
+
+ while (!wait_for_completion_timeout(&wait, msecs_to_jiffies(10000))) {
+ /* exit if there is no progress for push or pop size */
+ if (last_pushed == test_pushed || last_popped == test_popped) {
+ WARN_ONCE(true, TEST_FAILED_PREFIX "no progress\n");
+ force_exit = true;
+ continue;
+ }
+
+ last_pushed = test_pushed;
+ last_popped = test_popped;
+ pr_info("page_frag_test progress: pushed = %d, popped = %d\n",
+ test_pushed, test_popped);
+ }
+
+ if (force_exit) {
+ pr_err(TEST_FAILED_PREFIX "exit with error\n");
+ goto out;
+ }
+
+ duration = (u64)ktime_us_delta(ktime_get(), start);
+ pr_info("%d of iterations for %s testing took: %lluus\n", nr_test,
+ test_align ? "aligned" : "non-aligned", duration);
+
+out:
+ ptr_ring_cleanup(&ptr_ring, NULL);
+ page_frag_cache_drain(&test_nc);
+
+ return -EAGAIN;
+}
+
+static void __exit page_frag_test_exit(void)
+{
+}
+
+module_init(page_frag_test_init);
+module_exit(page_frag_test_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Yunsheng Lin <[email protected]>");
+MODULE_DESCRIPTION("Test module for page_frag");
diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h
index 580e1b0bb38e..d9d2100eafc0 100644
--- a/tools/testing/selftests/mm/pkey-arm64.h
+++ b/tools/testing/selftests/mm/pkey-arm64.h
@@ -31,6 +31,7 @@
#define NR_RESERVED_PKEYS 1 /* pkey-0 */
#define PKEY_ALLOW_ALL 0x77777777
+#define PKEY_REG_ALLOW_NONE 0x0
#define PKEY_BITS_PER_PKEY 4
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
@@ -126,7 +127,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey)
return 0;
}
-static void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey)
+static inline void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey)
{
struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt);
struct poe_context *poe_ctx =
diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index 9ab6a3ee153b..f7cfe163b0ff 100644
--- a/tools/testing/selftests/mm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -112,6 +112,13 @@ void record_pkey_malloc(void *ptr, long size, int prot);
#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
#endif
+/*
+ * FIXME: Remove once the generic PKEY_UNRESTRICTED definition is merged.
+ */
+#ifndef PKEY_UNRESTRICTED
+#define PKEY_UNRESTRICTED 0x0
+#endif
+
#ifndef set_pkey_bits
static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
{
diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h
index 5f28e26a2511..ac91777c8917 100644
--- a/tools/testing/selftests/mm/pkey-x86.h
+++ b/tools/testing/selftests/mm/pkey-x86.h
@@ -34,6 +34,8 @@
#define PAGE_SIZE 4096
#define MB (1<<20)
+#define PKEY_REG_ALLOW_NONE 0x55555555
+
static inline void __page_o_noops(void)
{
/* 8-bytes of instruction * 512 bytes = 1 page */
diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c
index a8088b645ad6..c593a426341c 100644
--- a/tools/testing/selftests/mm/pkey_sighandler_tests.c
+++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c
@@ -11,6 +11,7 @@
*/
#define _GNU_SOURCE
#define __SANE_USERSPACE_TYPES__
+#include <linux/mman.h>
#include <errno.h>
#include <sys/syscall.h>
#include <string.h>
@@ -59,12 +60,58 @@ long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6)
: "=a"(ret)
: "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5)
: "memory");
+#elif defined __aarch64__
+ register long x0 asm("x0") = a1;
+ register long x1 asm("x1") = a2;
+ register long x2 asm("x2") = a3;
+ register long x3 asm("x3") = a4;
+ register long x4 asm("x4") = a5;
+ register long x5 asm("x5") = a6;
+ register long x8 asm("x8") = n;
+ asm volatile ("svc #0"
+ : "=r"(x0)
+ : "r"(x0), "r"(x1), "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(x8)
+ : "memory");
+ ret = x0;
#else
# error syscall_raw() not implemented
#endif
return ret;
}
+static inline long clone_raw(unsigned long flags, void *stack,
+ int *parent_tid, int *child_tid)
+{
+ long a1 = flags;
+ long a2 = (long)stack;
+ long a3 = (long)parent_tid;
+#if defined(__x86_64__) || defined(__i386)
+ long a4 = (long)child_tid;
+ long a5 = 0;
+#elif defined(__aarch64__)
+ long a4 = 0;
+ long a5 = (long)child_tid;
+#else
+# error clone_raw() not implemented
+#endif
+
+ return syscall_raw(SYS_clone, a1, a2, a3, a4, a5, 0);
+}
+
+/*
+ * Returns the most restrictive pkey register value that can be used by the
+ * tests.
+ */
+static inline u64 pkey_reg_restrictive_default(void)
+{
+ /*
+ * Disallow everything except execution on pkey 0, so that each caller
+ * doesn't need to enable it explicitly (the selftest code runs with
+ * its code mapped with pkey 0).
+ */
+ return set_pkey_bits(PKEY_REG_ALLOW_NONE, 0, PKEY_DISABLE_ACCESS);
+}
+
static void sigsegv_handler(int signo, siginfo_t *info, void *ucontext)
{
pthread_mutex_lock(&mutex);
@@ -113,7 +160,7 @@ static void raise_sigusr2(void)
static void *thread_segv_with_pkey0_disabled(void *ptr)
{
/* Disable MPK 0 (and all others too) */
- __write_pkey_reg(0x55555555);
+ __write_pkey_reg(pkey_reg_restrictive_default());
/* Segfault (with SEGV_MAPERR) */
*(int *) (0x1) = 1;
@@ -123,7 +170,7 @@ static void *thread_segv_with_pkey0_disabled(void *ptr)
static void *thread_segv_pkuerr_stack(void *ptr)
{
/* Disable MPK 0 (and all others too) */
- __write_pkey_reg(0x55555555);
+ __write_pkey_reg(pkey_reg_restrictive_default());
/* After we disable MPK 0, we can't access the stack to return */
return NULL;
@@ -133,6 +180,7 @@ static void *thread_segv_maperr_ptr(void *ptr)
{
stack_t *stack = ptr;
int *bad = (int *)1;
+ u64 pkey_reg;
/*
* Setup alternate signal stack, which should be pkey_mprotect()ed by
@@ -142,7 +190,9 @@ static void *thread_segv_maperr_ptr(void *ptr)
syscall_raw(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0);
/* Disable MPK 0. Only MPK 1 is enabled. */
- __write_pkey_reg(0x55555551);
+ pkey_reg = pkey_reg_restrictive_default();
+ pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED);
+ __write_pkey_reg(pkey_reg);
/* Segfault */
*bad = 1;
@@ -240,6 +290,7 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void)
int pkey;
int parent_pid = 0;
int child_pid = 0;
+ u64 pkey_reg;
sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
@@ -257,7 +308,10 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void)
assert(stack != MAP_FAILED);
/* Allow access to MPK 0 and MPK 1 */
- __write_pkey_reg(0x55555550);
+ pkey_reg = pkey_reg_restrictive_default();
+ pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED);
+ pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED);
+ __write_pkey_reg(pkey_reg);
/* Protect the new stack with MPK 1 */
pkey = pkey_alloc(0, 0);
@@ -272,14 +326,13 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void)
memset(&siginfo, 0, sizeof(siginfo));
/* Use clone to avoid newer glibcs using rseq on new threads */
- long ret = syscall_raw(SYS_clone,
- CLONE_VM | CLONE_FS | CLONE_FILES |
- CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
- CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID |
- CLONE_DETACHED,
- (long) ((char *)(stack) + STACK_SIZE),
- (long) &parent_pid,
- (long) &child_pid, 0, 0);
+ long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES |
+ CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
+ CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID |
+ CLONE_DETACHED,
+ stack + STACK_SIZE,
+ &parent_pid,
+ &child_pid);
if (ret < 0) {
errno = -ret;
@@ -307,7 +360,13 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void)
static void test_pkru_preserved_after_sigusr1(void)
{
struct sigaction sa;
- unsigned long pkru = 0x45454544;
+ u64 pkey_reg;
+
+ /* Allow access to MPK 0 and an arbitrary set of keys */
+ pkey_reg = pkey_reg_restrictive_default();
+ pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED);
+ pkey_reg = set_pkey_bits(pkey_reg, 3, PKEY_UNRESTRICTED);
+ pkey_reg = set_pkey_bits(pkey_reg, 7, PKEY_UNRESTRICTED);
sa.sa_flags = SA_SIGINFO;
@@ -320,7 +379,7 @@ static void test_pkru_preserved_after_sigusr1(void)
memset(&siginfo, 0, sizeof(siginfo));
- __write_pkey_reg(pkru);
+ __write_pkey_reg(pkey_reg);
raise(SIGUSR1);
@@ -330,7 +389,7 @@ static void test_pkru_preserved_after_sigusr1(void)
pthread_mutex_unlock(&mutex);
/* Ensure the pkru value is the same after returning from signal. */
- ksft_test_result(pkru == __read_pkey_reg() &&
+ ksft_test_result(pkey_reg == __read_pkey_reg() &&
siginfo.si_signo == SIGUSR1,
"%s\n", __func__);
}
@@ -347,6 +406,7 @@ static noinline void *thread_sigusr2_self(void *ptr)
'S', 'I', 'G', 'U', 'S', 'R', '2',
'.', '.', '.', '\n', '\0'};
stack_t *stack = ptr;
+ u64 pkey_reg;
/*
* Setup alternate signal stack, which should be pkey_mprotect()ed by
@@ -356,7 +416,9 @@ static noinline void *thread_sigusr2_self(void *ptr)
syscall(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0);
/* Disable MPK 0. Only MPK 2 is enabled. */
- __write_pkey_reg(0x55555545);
+ pkey_reg = pkey_reg_restrictive_default();
+ pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED);
+ __write_pkey_reg(pkey_reg);
raise_sigusr2();
@@ -384,6 +446,7 @@ static void test_pkru_sigreturn(void)
int pkey;
int parent_pid = 0;
int child_pid = 0;
+ u64 pkey_reg;
sa.sa_handler = SIG_DFL;
sa.sa_flags = 0;
@@ -418,7 +481,10 @@ static void test_pkru_sigreturn(void)
* the current thread's stack is protected by the default MPK 0. Hence
* both need to be enabled.
*/
- __write_pkey_reg(0x55555544);
+ pkey_reg = pkey_reg_restrictive_default();
+ pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED);
+ pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED);
+ __write_pkey_reg(pkey_reg);
/* Protect the stack with MPK 2 */
pkey = pkey_alloc(0, 0);
@@ -431,14 +497,13 @@ static void test_pkru_sigreturn(void)
sigstack.ss_size = STACK_SIZE;
/* Use clone to avoid newer glibcs using rseq on new threads */
- long ret = syscall_raw(SYS_clone,
- CLONE_VM | CLONE_FS | CLONE_FILES |
- CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
- CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID |
- CLONE_DETACHED,
- (long) ((char *)(stack) + STACK_SIZE),
- (long) &parent_pid,
- (long) &child_pid, 0, 0);
+ long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES |
+ CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
+ CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID |
+ CLONE_DETACHED,
+ stack + STACK_SIZE,
+ &parent_pid,
+ &child_pid);
if (ret < 0) {
errno = -ret;
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index c5797ad1d37b..2fc290d9430c 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -75,6 +75,8 @@ separated by spaces:
read-only VMAs
- mdwe
test prctl(PR_SET_MDWE, ...)
+- page_frag
+ test handling of page fragment allocation and freeing
example: ./run_vmtests.sh -t "hmm mmap ksm"
EOF
@@ -347,10 +349,12 @@ if [ $VADDR64 -ne 0 ]; then
# allows high virtual address allocation requests independent
# of platform's physical memory.
- prev_policy=$(cat /proc/sys/vm/overcommit_memory)
- echo 1 > /proc/sys/vm/overcommit_memory
- CATEGORY="hugevm" run_test ./virtual_address_range
- echo $prev_policy > /proc/sys/vm/overcommit_memory
+ if [ -x ./virtual_address_range ]; then
+ prev_policy=$(cat /proc/sys/vm/overcommit_memory)
+ echo 1 > /proc/sys/vm/overcommit_memory
+ CATEGORY="hugevm" run_test ./virtual_address_range
+ echo $prev_policy > /proc/sys/vm/overcommit_memory
+ fi
# va high address boundary switch test
ARCH_ARM64="arm64"
@@ -456,6 +460,12 @@ CATEGORY="mkdirty" run_test ./mkdirty
CATEGORY="mdwe" run_test ./mdwe_test
+CATEGORY="page_frag" run_test ./test_page_frag.sh smoke
+
+CATEGORY="page_frag" run_test ./test_page_frag.sh aligned
+
+CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned
+
echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix
echo "1..${count_total}" | tap_output
diff --git a/tools/testing/selftests/mm/test_page_frag.sh b/tools/testing/selftests/mm/test_page_frag.sh
new file mode 100755
index 000000000000..f55b105084cf
--- /dev/null
+++ b/tools/testing/selftests/mm/test_page_frag.sh
@@ -0,0 +1,175 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2024 Yunsheng Lin <[email protected]>
+# Copyright (C) 2018 Uladzislau Rezki (Sony) <[email protected]>
+#
+# This is a test script for the kernel test driver to test the
+# correctness and performance of page_frag's implementation.
+# Therefore it is just a kernel module loader. You can specify
+# and pass different parameters in order to:
+# a) analyse performance of page fragment allocations;
+# b) stressing and stability check of page_frag subsystem.
+
+DRIVER="./page_frag/page_frag_test.ko"
+CPU_LIST=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
+TEST_CPU_0=$(echo $CPU_LIST | awk '{print $1}')
+
+if [ $(echo $CPU_LIST | wc -w) -gt 1 ]; then
+ TEST_CPU_1=$(echo $CPU_LIST | awk '{print $2}')
+ NR_TEST=100000000
+else
+ TEST_CPU_1=$TEST_CPU_0
+ NR_TEST=1000000
+fi
+
+# 1 if fails
+exitcode=1
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+check_test_failed_prefix() {
+ if dmesg | grep -q 'page_frag_test failed:';then
+ echo "page_frag_test failed, please check dmesg"
+ exit $exitcode
+ fi
+}
+
+#
+# Static templates for testing of page_frag APIs.
+# Also it is possible to pass any supported parameters manually.
+#
+SMOKE_PARAM="test_push_cpu=$TEST_CPU_0 test_pop_cpu=$TEST_CPU_1"
+NONALIGNED_PARAM="$SMOKE_PARAM test_alloc_len=75 nr_test=$NR_TEST"
+ALIGNED_PARAM="$NONALIGNED_PARAM test_align=1"
+
+check_test_requirements()
+{
+ uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo "$0: Must be run as root"
+ exit $ksft_skip
+ fi
+
+ if ! which insmod > /dev/null 2>&1; then
+ echo "$0: You need insmod installed"
+ exit $ksft_skip
+ fi
+
+ if [ ! -f $DRIVER ]; then
+ echo "$0: You need to compile page_frag_test module"
+ exit $ksft_skip
+ fi
+}
+
+run_nonaligned_check()
+{
+ echo "Run performance tests to evaluate how fast nonaligned alloc API is."
+
+ insmod $DRIVER $NONALIGNED_PARAM > /dev/null 2>&1
+}
+
+run_aligned_check()
+{
+ echo "Run performance tests to evaluate how fast aligned alloc API is."
+
+ insmod $DRIVER $ALIGNED_PARAM > /dev/null 2>&1
+}
+
+run_smoke_check()
+{
+ echo "Run smoke test."
+
+ insmod $DRIVER $SMOKE_PARAM > /dev/null 2>&1
+}
+
+usage()
+{
+ echo -n "Usage: $0 [ aligned ] | [ nonaligned ] | | [ smoke ] | "
+ echo "manual parameters"
+ echo
+ echo "Valid tests and parameters:"
+ echo
+ modinfo $DRIVER
+ echo
+ echo "Example usage:"
+ echo
+ echo "# Shows help message"
+ echo "$0"
+ echo
+ echo "# Smoke testing"
+ echo "$0 smoke"
+ echo
+ echo "# Performance testing for nonaligned alloc API"
+ echo "$0 nonaligned"
+ echo
+ echo "# Performance testing for aligned alloc API"
+ echo "$0 aligned"
+ echo
+ exit 0
+}
+
+function validate_passed_args()
+{
+ VALID_ARGS=`modinfo $DRIVER | awk '/parm:/ {print $2}' | sed 's/:.*//'`
+
+ #
+ # Something has been passed, check it.
+ #
+ for passed_arg in $@; do
+ key=${passed_arg//=*/}
+ valid=0
+
+ for valid_arg in $VALID_ARGS; do
+ if [[ $key = $valid_arg ]]; then
+ valid=1
+ break
+ fi
+ done
+
+ if [[ $valid -ne 1 ]]; then
+ echo "Error: key is not correct: ${key}"
+ exit $exitcode
+ fi
+ done
+}
+
+function run_manual_check()
+{
+ #
+ # Validate passed parameters. If there is wrong one,
+ # the script exists and does not execute further.
+ #
+ validate_passed_args $@
+
+ echo "Run the test with following parameters: $@"
+ insmod $DRIVER $@ > /dev/null 2>&1
+}
+
+function run_test()
+{
+ if [ $# -eq 0 ]; then
+ usage
+ else
+ if [[ "$1" = "smoke" ]]; then
+ run_smoke_check
+ elif [[ "$1" = "nonaligned" ]]; then
+ run_nonaligned_check
+ elif [[ "$1" = "aligned" ]]; then
+ run_aligned_check
+ else
+ run_manual_check $@
+ fi
+ fi
+
+ check_test_failed_prefix
+
+ echo "Done."
+ echo "Check the kernel ring buffer to see the summary."
+}
+
+check_test_requirements
+run_test $@
+
+exit 0
diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
index 4e4c1e311247..2a2b69e91950 100644
--- a/tools/testing/selftests/mm/virtual_address_range.c
+++ b/tools/testing/selftests/mm/virtual_address_range.c
@@ -64,7 +64,7 @@
#define NR_CHUNKS_HIGH NR_CHUNKS_384TB
#endif
-static char *hind_addr(void)
+static char *hint_addr(void)
{
int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT);
@@ -185,7 +185,7 @@ int main(int argc, char *argv[])
}
for (i = 0; i < NR_CHUNKS_HIGH; i++) {
- hint = hind_addr();
+ hint = hint_addr();
hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);