diff options
117 files changed, 1688 insertions, 730 deletions
diff --git a/Documentation/admin-guide/perf/imx-ddr.rst b/Documentation/admin-guide/perf/imx-ddr.rst new file mode 100644 index 000000000000..517a205abad6 --- /dev/null +++ b/Documentation/admin-guide/perf/imx-ddr.rst @@ -0,0 +1,52 @@ +===================================================== +Freescale i.MX8 DDR Performance Monitoring Unit (PMU) +===================================================== + +There are no performance counters inside the DRAM controller, so performance +signals are brought out to the edge of the controller where a set of 4 x 32 bit +counters is implemented. This is controlled by the CSV modes programed in counter +control register which causes a large number of PERF signals to be generated. + +Selection of the value for each counter is done via the config registers. There +is one register for each counter. Counter 0 is special in that it always counts +“time” and when expired causes a lock on itself and the other counters and an +interrupt is raised. If any other counter overflows, it continues counting, and +no interrupt is raised. + +The "format" directory describes format of the config (event ID) and config1 +(AXI filtering) fields of the perf_event_attr structure, see /sys/bus/event_source/ +devices/imx8_ddr0/format/. The "events" directory describes the events types +hardware supported that can be used with perf tool, see /sys/bus/event_source/ +devices/imx8_ddr0/events/. + e.g.:: + perf stat -a -e imx8_ddr0/cycles/ cmd + perf stat -a -e imx8_ddr0/read/,imx8_ddr0/write/ cmd + +AXI filtering is only used by CSV modes 0x41 (axid-read) and 0x42 (axid-write) +to count reading or writing matches filter setting. Filter setting is various +from different DRAM controller implementations, which is distinguished by quirks +in the driver. + +* With DDR_CAP_AXI_ID_FILTER quirk. + Filter is defined with two configuration parts: + --AXI_ID defines AxID matching value. + --AXI_MASKING defines which bits of AxID are meaningful for the matching. + 0:corresponding bit is masked. + 1: corresponding bit is not masked, i.e. used to do the matching. + + AXI_ID and AXI_MASKING are mapped on DPCR1 register in performance counter. + When non-masked bits are matching corresponding AXI_ID bits then counter is + incremented. Perf counter is incremented if + AxID && AXI_MASKING == AXI_ID && AXI_MASKING + + This filter doesn't support filter different AXI ID for axid-read and axid-write + event at the same time as this filter is shared between counters. + e.g.:: + perf stat -a -e imx8_ddr0/axid-read,axi_mask=0xMMMM,axi_id=0xDDDD/ cmd + perf stat -a -e imx8_ddr0/axid-write,axi_mask=0xMMMM,axi_id=0xDDDD/ cmd + + NOTE: axi_mask is inverted in userspace(i.e. set bits are bits to mask), and + it will be reverted in driver automatically. so that the user can just specify + axi_id to monitor a specific id, rather than having to specify axi_mask. + e.g.:: + perf stat -a -e imx8_ddr0/axid-read,axi_id=0x12/ cmd, which will monitor ARID=0x12 diff --git a/Documentation/arm64/index.rst b/Documentation/arm64/index.rst index 96b696ba4e6c..5c0c69dc58aa 100644 --- a/Documentation/arm64/index.rst +++ b/Documentation/arm64/index.rst @@ -16,6 +16,7 @@ ARM64 Architecture pointer-authentication silicon-errata sve + tagged-address-abi tagged-pointers .. only:: subproject and html diff --git a/Documentation/arm64/kasan-offsets.sh b/Documentation/arm64/kasan-offsets.sh new file mode 100644 index 000000000000..2b7a021db363 --- /dev/null +++ b/Documentation/arm64/kasan-offsets.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +# Print out the KASAN_SHADOW_OFFSETS required to place the KASAN SHADOW +# start address at the mid-point of the kernel VA space + +print_kasan_offset () { + printf "%02d\t" $1 + printf "0x%08x00000000\n" $(( (0xffffffff & (-1 << ($1 - 1 - 32))) \ + + (1 << ($1 - 32 - $2)) \ + - (1 << (64 - 32 - $2)) )) +} + +echo KASAN_SHADOW_SCALE_SHIFT = 3 +printf "VABITS\tKASAN_SHADOW_OFFSET\n" +print_kasan_offset 48 3 +print_kasan_offset 47 3 +print_kasan_offset 42 3 +print_kasan_offset 39 3 +print_kasan_offset 36 3 +echo +echo KASAN_SHADOW_SCALE_SHIFT = 4 +printf "VABITS\tKASAN_SHADOW_OFFSET\n" +print_kasan_offset 48 4 +print_kasan_offset 47 4 +print_kasan_offset 42 4 +print_kasan_offset 39 4 +print_kasan_offset 36 4 diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst index 464b880fc4b7..b040909e45f8 100644 --- a/Documentation/arm64/memory.rst +++ b/Documentation/arm64/memory.rst @@ -14,6 +14,10 @@ with the 4KB page configuration, allowing 39-bit (512GB) or 48-bit 64KB pages, only 2 levels of translation tables, allowing 42-bit (4TB) virtual address, are used but the memory layout is the same. +ARMv8.2 adds optional support for Large Virtual Address space. This is +only available when running with a 64KB page size and expands the +number of descriptors in the first level of translation. + User addresses have bits 63:48 set to 0 while the kernel addresses have the same bits set to 1. TTBRx selection is given by bit 63 of the virtual address. The swapper_pg_dir contains only kernel (global) @@ -22,40 +26,43 @@ The swapper_pg_dir address is written to TTBR1 and never written to TTBR0. -AArch64 Linux memory layout with 4KB pages + 3 levels:: - - Start End Size Use - ----------------------------------------------------------------------- - 0000000000000000 0000007fffffffff 512GB user - ffffff8000000000 ffffffffffffffff 512GB kernel - - -AArch64 Linux memory layout with 4KB pages + 4 levels:: +AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit):: Start End Size Use ----------------------------------------------------------------------- 0000000000000000 0000ffffffffffff 256TB user - ffff000000000000 ffffffffffffffff 256TB kernel - - -AArch64 Linux memory layout with 64KB pages + 2 levels:: + ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map + ffff800000000000 ffff9fffffffffff 32TB kasan shadow region + ffffa00000000000 ffffa00007ffffff 128MB bpf jit region + ffffa00008000000 ffffa0000fffffff 128MB modules + ffffa00010000000 fffffdffbffeffff ~93TB vmalloc + fffffdffbfff0000 fffffdfffe5f8fff ~998MB [guard region] + fffffdfffe5f9000 fffffdfffe9fffff 4124KB fixed mappings + fffffdfffea00000 fffffdfffebfffff 2MB [guard region] + fffffdfffec00000 fffffdffffbfffff 16MB PCI I/O space + fffffdffffc00000 fffffdffffdfffff 2MB [guard region] + fffffdffffe00000 ffffffffffdfffff 2TB vmemmap + ffffffffffe00000 ffffffffffffffff 2MB [guard region] + + +AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):: Start End Size Use ----------------------------------------------------------------------- - 0000000000000000 000003ffffffffff 4TB user - fffffc0000000000 ffffffffffffffff 4TB kernel - - -AArch64 Linux memory layout with 64KB pages + 3 levels:: - - Start End Size Use - ----------------------------------------------------------------------- - 0000000000000000 0000ffffffffffff 256TB user - ffff000000000000 ffffffffffffffff 256TB kernel - - -For details of the virtual kernel memory layout please see the kernel -booting log. + 0000000000000000 000fffffffffffff 4PB user + fff0000000000000 fff7ffffffffffff 2PB kernel logical memory map + fff8000000000000 fffd9fffffffffff 1440TB [gap] + fffda00000000000 ffff9fffffffffff 512TB kasan shadow region + ffffa00000000000 ffffa00007ffffff 128MB bpf jit region + ffffa00008000000 ffffa0000fffffff 128MB modules + ffffa00010000000 fffff81ffffeffff ~88TB vmalloc + fffff81fffff0000 fffffc1ffe58ffff ~3TB [guard region] + fffffc1ffe590000 fffffc1ffe9fffff 4544KB fixed mappings + fffffc1ffea00000 fffffc1ffebfffff 2MB [guard region] + fffffc1ffec00000 fffffc1fffbfffff 16MB PCI I/O space + fffffc1fffc00000 fffffc1fffdfffff 2MB [guard region] + fffffc1fffe00000 ffffffffffdfffff 3968GB vmemmap + ffffffffffe00000 ffffffffffffffff 2MB [guard region] Translation table lookup with 4KB pages:: @@ -83,7 +90,8 @@ Translation table lookup with 64KB pages:: | | | | [15:0] in-page offset | | | +----------> [28:16] L3 index | | +--------------------------> [41:29] L2 index - | +-------------------------------> [47:42] L1 index + | +-------------------------------> [47:42] L1 index (48-bit) + | [51:42] L1 index (52-bit) +-------------------------------------------------> [63] TTBR0/1 @@ -96,3 +104,62 @@ ARM64_HARDEN_EL2_VECTORS is selected for particular CPUs. When using KVM with the Virtualization Host Extensions, no additional mappings are created, since the host kernel runs directly in EL2. + +52-bit VA support in the kernel +------------------------------- +If the ARMv8.2-LVA optional feature is present, and we are running +with a 64KB page size; then it is possible to use 52-bits of address +space for both userspace and kernel addresses. However, any kernel +binary that supports 52-bit must also be able to fall back to 48-bit +at early boot time if the hardware feature is not present. + +This fallback mechanism necessitates the kernel .text to be in the +higher addresses such that they are invariant to 48/52-bit VAs. Due +to the kasan shadow being a fraction of the entire kernel VA space, +the end of the kasan shadow must also be in the higher half of the +kernel VA space for both 48/52-bit. (Switching from 48-bit to 52-bit, +the end of the kasan shadow is invariant and dependent on ~0UL, +whilst the start address will "grow" towards the lower addresses). + +In order to optimise phys_to_virt and virt_to_phys, the PAGE_OFFSET +is kept constant at 0xFFF0000000000000 (corresponding to 52-bit), +this obviates the need for an extra variable read. The physvirt +offset and vmemmap offsets are computed at early boot to enable +this logic. + +As a single binary will need to support both 48-bit and 52-bit VA +spaces, the VMEMMAP must be sized large enough for 52-bit VAs and +also must be sized large enought to accommodate a fixed PAGE_OFFSET. + +Most code in the kernel should not need to consider the VA_BITS, for +code that does need to know the VA size the variables are +defined as follows: + +VA_BITS constant the *maximum* VA space size + +VA_BITS_MIN constant the *minimum* VA space size + +vabits_actual variable the *actual* VA space size + + +Maximum and minimum sizes can be useful to ensure that buffers are +sized large enough or that addresses are positioned close enough for +the "worst" case. + +52-bit userspace VAs +-------------------- +To maintain compatibility with software that relies on the ARMv8.0 +VA space maximum size of 48-bits, the kernel will, by default, +return virtual addresses to userspace from a 48-bit range. + +Software can "opt-in" to receiving VAs from a 52-bit space by +specifying an mmap hint parameter that is larger than 48-bit. +For example: + maybe_high_address = mmap(~0UL, size, prot, flags,...); + +It is also possible to build a debug kernel that returns addresses +from a 52-bit space by enabling the following kernel config options: + CONFIG_EXPERT=y && CONFIG_ARM64_FORCE_52BIT=y + +Note that this option is only intended for debugging applications +and should not be used in production. diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst new file mode 100644 index 000000000000..d4a85d535bf9 --- /dev/null +++ b/Documentation/arm64/tagged-address-abi.rst @@ -0,0 +1,156 @@ +========================== +AArch64 TAGGED ADDRESS ABI +========================== + +Authors: Vincenzo Frascino <vincenzo.frascino@arm.com> + Catalin Marinas <catalin.marinas@arm.com> + +Date: 21 August 2019 + +This document describes the usage and semantics of the Tagged Address +ABI on AArch64 Linux. + +1. Introduction +--------------- + +On AArch64 the ``TCR_EL1.TBI0`` bit is set by default, allowing +userspace (EL0) to perform memory accesses through 64-bit pointers with +a non-zero top byte. This document describes the relaxation of the +syscall ABI that allows userspace to pass certain tagged pointers to +kernel syscalls. + +2. AArch64 Tagged Address ABI +----------------------------- + +From the kernel syscall interface perspective and for the purposes of +this document, a "valid tagged pointer" is a pointer with a potentially +non-zero top-byte that references an address in the user process address +space obtained in one of the following ways: + +- ``mmap()`` syscall where either: + + - flags have the ``MAP_ANONYMOUS`` bit set or + - the file descriptor refers to a regular file (including those + returned by ``memfd_create()``) or ``/dev/zero`` + +- ``brk()`` syscall (i.e. the heap area between the initial location of + the program break at process creation and its current location). + +- any memory mapped by the kernel in the address space of the process + during creation and with the same restrictions as for ``mmap()`` above + (e.g. data, bss, stack). + +The AArch64 Tagged Address ABI has two stages of relaxation depending +how the user addresses are used by the kernel: + +1. User addresses not accessed by the kernel but used for address space + management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use + of valid tagged pointers in this context is always allowed. + +2. User addresses accessed by the kernel (e.g. ``write()``). This ABI + relaxation is disabled by default and the application thread needs to + explicitly enable it via ``prctl()`` as follows: + + - ``PR_SET_TAGGED_ADDR_CTRL``: enable or disable the AArch64 Tagged + Address ABI for the calling thread. + + The ``(unsigned int) arg2`` argument is a bit mask describing the + control mode used: + + - ``PR_TAGGED_ADDR_ENABLE``: enable AArch64 Tagged Address ABI. + Default status is disabled. + + Arguments ``arg3``, ``arg4``, and ``arg5`` must be 0. + + - ``PR_GET_TAGGED_ADDR_CTRL``: get the status of the AArch64 Tagged + Address ABI for the calling thread. + + Arguments ``arg2``, ``arg3``, ``arg4``, and ``arg5`` must be 0. + + The ABI properties described above are thread-scoped, inherited on + clone() and fork() and cleared on exec(). + + Calling ``prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0)`` + returns ``-EINVAL`` if the AArch64 Tagged Address ABI is globally + disabled by ``sysctl abi.tagged_addr_disabled=1``. The default + ``sysctl abi.tagged_addr_disabled`` configuration is 0. + +When the AArch64 Tagged Address ABI is enabled for a thread, the +following behaviours are guaranteed: + +- All syscalls except the cases mentioned in section 3 can accept any + valid tagged pointer. + +- The syscall behaviour is undefined for invalid tagged pointers: it may + result in an error code being returned, a (fatal) signal being raised, + or other modes of failure. + +- The syscall behaviour for a valid tagged pointer is the same as for + the corresponding untagged pointer. + + +A definition of the meaning of tagged pointers on AArch64 can be found +in Documentation/arm64/tagged-pointers.rst. + +3. AArch64 Tagged Address ABI Exceptions +----------------------------------------- + +The following system call parameters must be untagged regardless of the +ABI relaxation: + +- ``prctl()`` other than pointers to user data either passed directly or + indirectly as arguments to be accessed by the kernel. + +- ``ioctl()`` other than pointers to user data either passed directly or + indirectly as arguments to be accessed by the kernel. + +- ``shmat()`` and ``shmdt()``. + +Any attempt to use non-zero tagged pointers may result in an error code +being returned, a (fatal) signal being raised, or other modes of +failure. + +4. Example of correct usage +--------------------------- +.. code-block:: c + + #include <stdlib.h> + #include <string.h> + #include <unistd.h> + #include <sys/mman.h> + #include <sys/prctl.h> + + #define PR_SET_TAGGED_ADDR_CTRL 55 + #define PR_TAGGED_ADDR_ENABLE (1UL << 0) + + #define TAG_SHIFT 56 + + int main(void) + { + int tbi_enabled = 0; + unsigned long tag = 0; + char *ptr; + + /* check/enable the tagged address ABI */ + if (!prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0)) + tbi_enabled = 1; + + /* memory allocation */ + ptr = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) + return 1; + + /* set a non-zero tag if the ABI is available */ + if (tbi_enabled) + tag = rand() & 0xff; + ptr = (char *)((unsigned long)ptr | (tag << TAG_SHIFT)); + + /* memory access to a tagged address */ + strcpy(ptr, "tagged pointer\n"); + + /* syscall with a tagged pointer */ + write(1, ptr, strlen(ptr)); + + return 0; + } diff --git a/Documentation/arm64/tagged-pointers.rst b/Documentation/arm64/tagged-pointers.rst index 2acdec3ebbeb..eab4323609b9 100644 --- a/Documentation/arm64/tagged-pointers.rst +++ b/Documentation/arm64/tagged-pointers.rst @@ -20,7 +20,9 @@ Passing tagged addresses to the kernel -------------------------------------- All interpretation of userspace memory addresses by the kernel assumes -an address tag of 0x00. +an address tag of 0x00, unless the application enables the AArch64 +Tagged Address ABI explicitly +(Documentation/arm64/tagged-address-abi.rst). This includes, but is not limited to, addresses found in: @@ -33,13 +35,15 @@ This includes, but is not limited to, addresses found in: - the frame pointer (x29) and frame records, e.g. when interpreting them to generate a backtrace or call graph. -Using non-zero address tags in any of these locations may result in an -error code being returned, a (fatal) signal being raised, or other modes -of failure. +Using non-zero address tags in any of these locations when the +userspace application did not enable the AArch64 Tagged Address ABI may +result in an error code being returned, a (fatal) signal being raised, +or other modes of failure. -For these reasons, passing non-zero address tags to the kernel via -system calls is forbidden, and using a non-zero address tag for sp is -strongly discouraged. +For these reasons, when the AArch64 Tagged Address ABI is disabled, +passing non-zero address tags to the kernel via system calls is +forbidden, and using a non-zero address tag for sp is strongly +discouraged. Programs maintaining a frame pointer and frame records that use non-zero address tags may suffer impaired or inaccurate debug and profiling @@ -59,6 +63,9 @@ be preserved. The architecture prevents the use of a tagged PC, so the upper byte will be set to a sign-extension of bit 55 on exception return. +This behaviour is maintained when the AArch64 Tagged Address ABI is +enabled. + Other considerations -------------------- diff --git a/MAINTAINERS b/MAINTAINERS index e4a286fb13ba..f29cfc59d51c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4290,6 +4290,14 @@ S: Supported F: drivers/cpuidle/cpuidle-exynos.c F: arch/arm/mach-exynos/pm.c +CPUIDLE DRIVER - ARM PSCI +M: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> +M: Sudeep Holla <sudeep.holla@arm.com> +L: linux-pm@vger.kernel.org +L: linux-arm-kernel@lists.infradead.org +S: Supported +F: drivers/cpuidle/cpuidle-psci.c + CPU IDLE TIME MANAGEMENT FRAMEWORK M: "Rafael J. Wysocki" <rjw@rjwysocki.net> M: Daniel Lezcano <daniel.lezcano@linaro.org> @@ -6439,6 +6447,7 @@ M: Frank Li <Frank.li@nxp.com> L: linux-arm-kernel@lists.infradead.org S: Maintained F: drivers/perf/fsl_imx8_ddr_perf.c +F: Documentation/admin-guide/perf/imx-ddr.rst F: Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt FREESCALE IMX LPI2C DRIVER @@ -912,6 +912,10 @@ ifeq ($(CONFIG_STRIP_ASM_SYMS),y) LDFLAGS_vmlinux += $(call ld-option, -X,) endif +ifeq ($(CONFIG_RELR),y) +LDFLAGS_vmlinux += --pack-dyn-relocs=relr +endif + # insure the checker run with the right endianness CHECKFLAGS += $(if $(CONFIG_CPU_BIG_ENDIAN),-mbig-endian,-mlittle-endian) diff --git a/arch/Kconfig b/arch/Kconfig index a7b57dd42c26..aa6bdb3df5c1 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -925,6 +925,20 @@ config LOCK_EVENT_COUNTS the chance of application behavior change because of timing differences. The counts are reported via debugfs. +# Select if the architecture has support for applying RELR relocations. +config ARCH_HAS_RELR + bool + +config RELR + bool "Use RELR relocation packing" + depends on ARCH_HAS_RELR && TOOLS_SUPPORT_RELR + default y + help + Store the kernel's dynamic relocations in the RELR relocation packing + format. Requires a compatible linker (LLD supports this feature), as + well as compatible NM and OBJCOPY utilities (llvm-nm and llvm-objcopy + are compatible). + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild new file mode 100644 index 000000000000..d6465823b281 --- /dev/null +++ b/arch/arm64/Kbuild @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y += kernel/ mm/ +obj-$(CONFIG_NET) += net/ +obj-$(CONFIG_KVM) += kvm/ +obj-$(CONFIG_XEN) += xen/ +obj-$(CONFIG_CRYPTO) += crypto/ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3adcec05b1f6..6481964b6425 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -148,6 +148,7 @@ config ARM64 select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS @@ -286,7 +287,7 @@ config PGTABLE_LEVELS int default 2 if ARM64_16K_PAGES && ARM64_VA_BITS_36 default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 - default 3 if ARM64_64K_PAGES && (ARM64_VA_BITS_48 || ARM64_USER_VA_BITS_52) + default 3 if ARM64_64K_PAGES && (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 default 3 if ARM64_16K_PAGES && ARM64_VA_BITS_47 default 4 if !ARM64_64K_PAGES && ARM64_VA_BITS_48 @@ -297,6 +298,21 @@ config ARCH_SUPPORTS_UPROBES config ARCH_PROC_KCORE_TEXT def_bool y +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0xdfffa00000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && !KASAN_SW_TAGS + default 0xdfffd00000000000 if ARM64_VA_BITS_47 && !KASAN_SW_TAGS + default 0xdffffe8000000000 if ARM64_VA_BITS_42 && !KASAN_SW_TAGS + default 0xdfffffd000000000 if ARM64_VA_BITS_39 && !KASAN_SW_TAGS + default 0xdffffffa00000000 if ARM64_VA_BITS_36 && !KASAN_SW_TAGS + default 0xefff900000000000 if (ARM64_VA_BITS_48 || ARM64_VA_BITS_52) && KASAN_SW_TAGS + default 0xefffc80000000000 if ARM64_VA_BITS_47 && KASAN_SW_TAGS + default 0xeffffe4000000000 if ARM64_VA_BITS_42 && KASAN_SW_TAGS + default 0xefffffc800000000 if ARM64_VA_BITS_39 && KASAN_SW_TAGS + default 0xeffffff900000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS + default 0xffffffffffffffff + source "arch/arm64/Kconfig.platforms" menu "Kernel Features" @@ -744,13 +760,14 @@ config ARM64_VA_BITS_47 config ARM64_VA_BITS_48 bool "48-bit" -config ARM64_USER_VA_BITS_52 - bool "52-bit (user)" +config ARM64_VA_BITS_52 + bool "52-bit" depends on ARM64_64K_PAGES && (ARM64_PAN || !ARM64_SW_TTBR0_PAN) help Enable 52-bit virtual addressing for userspace when explicitly - requested via a hint to mmap(). The kernel will continue to - use 48-bit virtual addresses for its own mappings. + requested via a hint to mmap(). The kernel will also use 52-bit + virtual addresses for its own mappings (provided HW support for + this feature is available, otherwise it reverts to 48-bit). NOTE: Enabling 52-bit virtual addressing in conjunction with ARMv8.3 Pointer Authentication will result in the PAC being @@ -763,7 +780,7 @@ endchoice config ARM64_FORCE_52BIT bool "Force 52-bit virtual addresses for userspace" - depends on ARM64_USER_VA_BITS_52 && EXPERT + depends on ARM64_VA_BITS_52 && EXPERT help For systems with 52-bit userspace VAs enabled, the kernel will attempt to maintain compatibility with older software by providing 48-bit VAs @@ -780,7 +797,8 @@ config ARM64_VA_BITS default 39 if ARM64_VA_BITS_39 default 42 if ARM64_VA_BITS_42 default 47 if ARM64_VA_BITS_47 - default 48 if ARM64_VA_BITS_48 || ARM64_USER_VA_BITS_52 + default 48 if ARM64_VA_BITS_48 + default 52 if ARM64_VA_BITS_52 choice prompt "Physical address space size" @@ -1110,6 +1128,15 @@ config ARM64_SW_TTBR0_PAN zeroed area and reserved ASID. The user access routines restore the valid TTBR0_EL1 temporarily. +config ARM64_TAGGED_ADDR_ABI + bool "Enable the tagged user addresses syscall ABI" + default y + help + When this option is enabled, user applications can opt in to a + relaxed ABI via prctl() allowing tagged addresses to be passed + to system calls as pointer arguments. For details, see + Documentation/arm64/tagged-address-abi.txt. + menuconfig COMPAT bool "Kernel support for 32-bit EL0" depends on ARM64_4K_PAGES || EXPERT @@ -1467,6 +1494,7 @@ endif config RELOCATABLE bool + select ARCH_HAS_RELR help This builds the kernel as a Position Independent Executable (PIE), which retains all relocation metadata required to relocate the diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 61de992bbea3..2847b36f72ed 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -126,21 +126,9 @@ KBUILD_CFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_CPPFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_AFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) -# KASAN_SHADOW_OFFSET = VA_START + (1 << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT)) -# - (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) -# in 32-bit arithmetic -KASAN_SHADOW_OFFSET := $(shell printf "0x%08x00000000\n" $$(( \ - (0xffffffff & (-1 << ($(CONFIG_ARM64_VA_BITS) - 32))) \ - + (1 << ($(CONFIG_ARM64_VA_BITS) - 32 - $(KASAN_SHADOW_SCALE_SHIFT))) \ - - (1 << (64 - 32 - $(KASAN_SHADOW_SCALE_SHIFT))) )) ) - export TEXT_OFFSET GZFLAGS -core-y += arch/arm64/kernel/ arch/arm64/mm/ -core-$(CONFIG_NET) += arch/arm64/net/ -core-$(CONFIG_KVM) += arch/arm64/kvm/ -core-$(CONFIG_XEN) += arch/arm64/xen/ -core-$(CONFIG_CRYPTO) += arch/arm64/crypto/ +core-y += arch/arm64/ libs-y := arch/arm64/lib/ $(libs-y) core-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index e3a15c751b13..b8cf7c85ffa2 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -124,17 +124,6 @@ alternative_endif .endm /* - * Sanitise a 64-bit bounded index wrt speculation, returning zero if out - * of bounds. - */ - .macro mask_nospec64, idx, limit, tmp - sub \tmp, \idx, \limit - bic \tmp, \tmp, \idx - and \idx, \idx, \tmp, asr #63 - csdb - .endm - -/* * NOP sequence */ .macro nops, num @@ -350,6 +339,13 @@ alternative_endif .endm /* + * tcr_set_t1sz - update TCR.T1SZ + */ + .macro tcr_set_t1sz, valreg, t1sz + bfi \valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH + .endm + +/* * tcr_compute_pa_size - set TCR.(I)PS to the highest supported * ID_AA64MMFR0_EL1.PARange value * @@ -538,9 +534,13 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU * In future this may be nop'ed out when dealing with 52-bit kernel VAs. * ttbr: Value of ttbr to set, modified. */ - .macro offset_ttbr1, ttbr -#ifdef CONFIG_ARM64_USER_VA_BITS_52 + .macro offset_ttbr1, ttbr, tmp +#ifdef CONFIG_ARM64_VA_BITS_52 + mrs_s \tmp, SYS_ID_AA64MMFR2_EL1 + and \tmp, \tmp, #(0xf << ID_AA64MMFR2_LVA_SHIFT) + cbnz \tmp, .Lskipoffs_\@ orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET +.Lskipoffs_\@ : #endif .endm @@ -550,7 +550,7 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU * to be nop'ed out when dealing with 52-bit kernel VAs. */ .macro restore_ttbr1, ttbr -#ifdef CONFIG_ARM64_USER_VA_BITS_52 +#ifdef CONFIG_ARM64_VA_BITS_52 bic \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET #endif .endm diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 657b0457d83c..a5ca23950cfd 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -15,8 +15,6 @@ #include <asm/barrier.h> #include <asm/lse.h> -#ifdef __KERNEL__ - #define __ARM64_IN_ATOMIC_IMPL #if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE) @@ -157,5 +155,4 @@ #include <asm-generic/atomic-instrumented.h> -#endif -#endif +#endif /* __ASM_ATOMIC_H */ diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 64eeaa41e7ca..43da6dd29592 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -78,7 +78,7 @@ static inline u32 cache_type_cwg(void) return (read_cpuid_cachetype() >> CTR_CWG_SHIFT) & CTR_CWG_MASK; } -#define __read_mostly __attribute__((__section__(".data..read_mostly"))) +#define __read_mostly __section(.data..read_mostly) static inline int cache_line_size_of_cpu(void) { diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index fb8ad4616b3b..b0d53a265f1d 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -4,7 +4,6 @@ */ #ifndef __ASM_COMPAT_H #define __ASM_COMPAT_H -#ifdef __KERNEL__ #ifdef CONFIG_COMPAT /* @@ -215,5 +214,4 @@ static inline int is_compat_thread(struct thread_info *thread) } #endif /* CONFIG_COMPAT */ -#endif /* __KERNEL__ */ #endif /* __ASM_COMPAT_H */ diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index c09d633c3109..86aabf1e0199 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -23,6 +23,8 @@ * @cpu_boot: Boots a cpu into the kernel. * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary * synchronisation. Called from the cpu being booted. + * @cpu_can_disable: Determines whether a CPU can be disabled based on + * mechanism-specific information. * @cpu_disable: Prepares a cpu to die. May fail for some mechanism-specific * reason, which will cause the hot unplug to be aborted. Called * from the cpu to be killed. @@ -42,6 +44,7 @@ struct cpu_operations { int (*cpu_boot)(unsigned int); void (*cpu_postboot)(void); #ifdef CONFIG_HOTPLUG_CPU + bool (*cpu_can_disable)(unsigned int cpu); int (*cpu_disable)(unsigned int cpu); void (*cpu_die)(unsigned int cpu); int (*cpu_kill)(unsigned int cpu); diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index c96ffa4722d3..9cde5d2e768f 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -289,9 +289,16 @@ struct arm64_cpu_capabilities { u16 type; bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope); /* - * Take the appropriate actions to enable this capability for this CPU. - * For each successfully booted CPU, this method is called for each - * globally detected capability. + * Take the appropriate actions to configure this capability + * for this CPU. If the capability is detected by the kernel + * this will be called on all the CPUs in the system, + * including the hotplugged CPUs, regardless of whether the + * capability is available on that specific CPU. This is + * useful for some capabilities (e.g, working around CPU + * errata), where all the CPUs must take some action (e.g, + * changing system control/configuration). Thus, if an action + * is required only if the CPU has the capability, then the + * routine must check it before taking any action. */ void (*cpu_enable)(const struct arm64_cpu_capabilities *cap); union { @@ -363,21 +370,6 @@ cpucap_multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry, return false; } -/* - * Take appropriate action for all matching entries in the shared capability - * entry. - */ -static inline void -cpucap_multi_entry_cap_cpu_enable(const struct arm64_cpu_capabilities *entry) -{ - const struct arm64_cpu_capabilities *caps; - - for (caps = entry->match_list; caps->matches; caps++) - if (caps->matches(caps, SCOPE_LOCAL_CPU) && - caps->cpu_enable) - caps->cpu_enable(caps); -} - extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index e7d46631cc42..b1454d117cd2 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -51,14 +51,6 @@ #define MIDR_CPU_MODEL_MASK (MIDR_IMPLEMENTOR_MASK | MIDR_PARTNUM_MASK | \ MIDR_ARCHITECTURE_MASK) -#define MIDR_IS_CPU_MODEL_RANGE(midr, model, rv_min, rv_max) \ -({ \ - u32 _model = (midr) & MIDR_CPU_MODEL_MASK; \ - u32 rv = (midr) & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); \ - \ - _model == (model) && rv >= (rv_min) && rv <= (rv_max); \ - }) - #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_APM 0x50 #define ARM_CPU_IMP_CAVIUM 0x43 @@ -159,10 +151,19 @@ struct midr_range { #define MIDR_REV(m, v, r) MIDR_RANGE(m, v, r, v, r) #define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf) +static inline bool midr_is_cpu_model_range(u32 midr, u32 model, u32 rv_min, + u32 rv_max) +{ + u32 _model = midr & MIDR_CPU_MODEL_MASK; + u32 rv = midr & (MIDR_REVISION_MASK | MIDR_VARIANT_MASK); + + return _model == model && rv >= rv_min && rv <= rv_max; +} + static inline bool is_midr_in_range(u32 midr, struct midr_range const *range) { - return MIDR_IS_CPU_MODEL_RANGE(midr, range->model, - range->rv_min, range->rv_max); + return midr_is_cpu_model_range(midr, range->model, + range->rv_min, range->rv_max); } static inline bool diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index d8ec5bb881c2..7619f473155f 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -5,8 +5,6 @@ #ifndef __ASM_DEBUG_MONITORS_H #define __ASM_DEBUG_MONITORS_H -#ifdef __KERNEL__ - #include <linux/errno.h> #include <linux/types.h> #include <asm/brk-imm.h> @@ -128,5 +126,4 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs) int aarch32_break_handler(struct pt_regs *regs); #endif /* __ASSEMBLY */ -#endif /* __KERNEL__ */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index bdcb0922a40c..fb3e5044f473 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -5,8 +5,6 @@ #ifndef __ASM_DMA_MAPPING_H #define __ASM_DMA_MAPPING_H -#ifdef __KERNEL__ - #include <linux/types.h> #include <linux/vmalloc.h> @@ -27,5 +25,4 @@ static inline bool is_device_dma_coherent(struct device *dev) return dev->dma_coherent; } -#endif /* __KERNEL__ */ #endif /* __ASM_DMA_MAPPING_H */ diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 76a144702586..b54d3a86c444 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -79,7 +79,7 @@ static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base) /* * On arm64, we have to ensure that the initrd ends up in the linear region, - * which is a 1 GB aligned region of size '1UL << (VA_BITS - 1)' that is + * which is a 1 GB aligned region of size '1UL << (VA_BITS_MIN - 1)' that is * guaranteed to cover the kernel Image. * * Since the EFI stub is part of the kernel Image, we can relax the @@ -90,7 +90,7 @@ static inline unsigned long efi_get_max_fdt_addr(unsigned long dram_base) static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, unsigned long image_addr) { - return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS - 1)); + return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1)); } #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 65ac18400979..cb29253ae86b 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -34,7 +34,8 @@ #define ESR_ELx_EC_SMC64 (0x17) /* EL2 and above */ #define ESR_ELx_EC_SYS64 (0x18) #define ESR_ELx_EC_SVE (0x19) -/* Unallocated EC: 0x1A - 0x1E */ +#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */ +/* Unallocated EC: 0x1b - 0x1E */ #define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */ #define ESR_ELx_EC_IABT_LOW (0x20) #define ESR_ELx_EC_IABT_CUR (0x21) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index ed57b760f38c..a17393ff6677 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -30,4 +30,6 @@ static inline u32 disr_to_esr(u64 disr) return esr; } +asmlinkage void enter_from_user_mode(void); + #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index b6a2c352f4c3..59f10dd13f12 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -21,7 +21,7 @@ #include <linux/stddef.h> #include <linux/types.h> -#if defined(__KERNEL__) && defined(CONFIG_COMPAT) +#ifdef CONFIG_COMPAT /* Masks for extracting the FPSR and FPCR from the FPSCR */ #define VFP_FPSCR_STAT_MASK 0xf800009f #define VFP_FPSCR_CTRL_MASK 0x07f79f00 diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 6211e3105491..6cc26a127819 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -5,8 +5,6 @@ #ifndef __ASM_FUTEX_H #define __ASM_FUTEX_H -#ifdef __KERNEL__ - #include <linux/futex.h> #include <linux/uaccess.h> @@ -129,5 +127,4 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, return ret; } -#endif /* __KERNEL__ */ #endif /* __ASM_FUTEX_H */ diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h index db9ab760e6fd..bc7aaed4b34e 100644 --- a/arch/arm64/include/asm/hw_breakpoint.h +++ b/arch/arm64/include/asm/hw_breakpoint.h @@ -10,8 +10,6 @@ #include <asm/sysreg.h> #include <asm/virt.h> -#ifdef __KERNEL__ - struct arch_hw_breakpoint_ctrl { u32 __reserved : 19, len : 8, @@ -156,5 +154,4 @@ static inline int get_num_wrps(void) ID_AA64DFR0_WRPS_SHIFT); } -#endif /* __KERNEL__ */ #endif /* __ASM_BREAKPOINT_H */ diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 7ed92626949d..e9763831186a 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -8,8 +8,6 @@ #ifndef __ASM_IO_H #define __ASM_IO_H -#ifdef __KERNEL__ - #include <linux/types.h> #include <asm/byteorder.h> @@ -97,7 +95,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) ({ \ unsigned long tmp; \ \ - rmb(); \ + dma_rmb(); \ \ /* \ * Create a dummy control dependency from the IO read to any \ @@ -111,7 +109,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) }) #define __io_par(v) __iormb(v) -#define __iowmb() wmb() +#define __iowmb() dma_wmb() /* * Relaxed I/O memory access primitives. These follow the Device memory @@ -207,5 +205,4 @@ extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); extern int devmem_is_allowed(unsigned long pfn); -#endif /* __KERNEL__ */ #endif /* __ASM_IO_H */ diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 7872f260c9ee..1a59f0ed1ae3 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -5,8 +5,6 @@ #ifndef __ASM_IRQFLAGS_H #define __ASM_IRQFLAGS_H -#ifdef __KERNEL__ - #include <asm/alternative.h> #include <asm/ptrace.h> #include <asm/sysreg.h> @@ -128,5 +126,4 @@ static inline void arch_local_irq_restore(unsigned long flags) : "memory"); } -#endif -#endif +#endif /* __ASM_IRQFLAGS_H */ diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h index b52aacd2c526..b0dc4abc3589 100644 --- a/arch/arm64/include/asm/kasan.h +++ b/arch/arm64/include/asm/kasan.h @@ -18,11 +18,8 @@ * KASAN_SHADOW_START: beginning of the kernel virtual addresses. * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses, * where N = (1 << KASAN_SHADOW_SCALE_SHIFT). - */ -#define KASAN_SHADOW_START (VA_START) -#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) - -/* + * + * KASAN_SHADOW_OFFSET: * This value is used to map an address to the corresponding shadow * address by the following formula: * shadow_addr = (address >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET @@ -33,8 +30,8 @@ * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - * (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT)) */ -#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \ - (64 - KASAN_SHADOW_SCALE_SHIFT))) +#define _KASAN_SHADOW_START(va) (KASAN_SHADOW_END - (1UL << ((va) - KASAN_SHADOW_SCALE_SHIFT))) +#define KASAN_SHADOW_START _KASAN_SHADOW_START(vabits_actual) void kasan_init(void); void kasan_copy_shadow(pgd_t *pgdir); diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index fb04f10a78ab..b61b50bf68b1 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -12,10 +12,10 @@ #include <linux/compiler.h> #include <linux/const.h> +#include <linux/sizes.h> #include <linux/types.h> #include <asm/bug.h> #include <asm/page-def.h> -#include <linux/sizes.h> /* * Size of the PCI I/O space. This must remain a power of two so that @@ -26,37 +26,50 @@ /* * VMEMMAP_SIZE - allows the whole linear region to be covered by * a struct page array + * + * If we are configured with a 52-bit kernel VA then our VMEMMAP_SIZE + * needs to cover the memory region from the beginning of the 52-bit + * PAGE_OFFSET all the way to PAGE_END for 48-bit. This allows us to + * keep a constant PAGE_OFFSET and "fallback" to using the higher end + * of the VMEMMAP where 52-bit support is not available in hardware. */ -#define VMEMMAP_SIZE (UL(1) << (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)) +#define VMEMMAP_SIZE ((_PAGE_END(VA_BITS_MIN) - PAGE_OFFSET) \ + >> (PAGE_SHIFT - STRUCT_PAGE_MAX_SHIFT)) /* - * PAGE_OFFSET - the virtual address of the start of the linear map (top - * (VA_BITS - 1)) - * KIMAGE_VADDR - the virtual address of the start of the kernel image + * PAGE_OFFSET - the virtual address of the start of the linear map, at the + * start of the TTBR1 address space. + * PAGE_END - the end of the linear map, where all other kernel mappings begin. + * KIMAGE_VADDR - the virtual address of the start of the kernel image. * VA_BITS - the maximum number of bits for virtual addresses. - * VA_START - the first kernel virtual address. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) -#define VA_START (UL(0xffffffffffffffff) - \ - (UL(1) << VA_BITS) + 1) -#define PAGE_OFFSET (UL(0xffffffffffffffff) - \ - (UL(1) << (VA_BITS - 1)) + 1) +#define _PAGE_OFFSET(va) (-(UL(1) << (va))) +#define PAGE_OFFSET (_PAGE_OFFSET(VA_BITS)) #define KIMAGE_VADDR (MODULES_END) -#define BPF_JIT_REGION_START (VA_START + KASAN_SHADOW_SIZE) +#define BPF_JIT_REGION_START (KASAN_SHADOW_END) #define BPF_JIT_REGION_SIZE (SZ_128M) #define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (BPF_JIT_REGION_END) #define MODULES_VSIZE (SZ_128M) -#define VMEMMAP_START (PAGE_OFFSET - VMEMMAP_SIZE) +#define VMEMMAP_START (-VMEMMAP_SIZE - SZ_2M) #define PCI_IO_END (VMEMMAP_START - SZ_2M) #define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) #define FIXADDR_TOP (PCI_IO_START - SZ_2M) -#define KERNEL_START _text -#define KERNEL_END _end +#if VA_BITS > 48 +#define VA_BITS_MIN (48) +#else +#define VA_BITS_MIN (VA_BITS) +#endif + +#define _PAGE_END(va) (-(UL(1) << ((va) - 1))) -#ifdef CONFIG_ARM64_USER_VA_BITS_52 +#define KERNEL_START _text +#define KERNEL_END _end + +#ifdef CONFIG_ARM64_VA_BITS_52 #define MAX_USER_VA_BITS 52 #else #define MAX_USER_VA_BITS VA_BITS @@ -68,12 +81,14 @@ * significantly, so double the (minimum) stack size when they are in use. */ #ifdef CONFIG_KASAN -#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT)) +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) +#define KASAN_SHADOW_END ((UL(1) << (64 - KASAN_SHADOW_SCALE_SHIFT)) \ + + KASAN_SHADOW_OFFSET) #define KASAN_THREAD_SHIFT 1 #else -#define KASAN_SHADOW_SIZE (0) #define KASAN_THREAD_SHIFT 0 -#endif +#define KASAN_SHADOW_END (_PAGE_END(VA_BITS_MIN)) +#endif /* CONFIG_KASAN */ #define MIN_THREAD_SHIFT (14 + KASAN_THREAD_SHIFT) @@ -117,14 +132,14 @@ * 16 KB granule: 128 level 3 entries, with contiguous bit * 64 KB granule: 32 level 3 entries, with contiguous bit */ -#define SEGMENT_ALIGN SZ_2M +#define SEGMENT_ALIGN SZ_2M #else /* * 4 KB granule: 16 level 3 entries, with contiguous bit * 16 KB granule: 4 level 3 entries, without contiguous bit * 64 KB granule: 1 level 3 entry */ -#define SEGMENT_ALIGN SZ_64K +#define SEGMENT_ALIGN SZ_64K #endif /* @@ -157,10 +172,13 @@ #endif #ifndef __ASSEMBLY__ +extern u64 vabits_actual; +#define PAGE_END (_PAGE_END(vabits_actual)) #include <linux/bitops.h> #include <linux/mmdebug.h> +extern s64 physvirt_offset; extern s64 memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ VM_BUG_ON(memstart_addr & 1); memstart_addr; }) @@ -176,9 +194,6 @@ static inline unsigned long kaslr_offset(void) return kimage_vaddr - KIMAGE_VADDR; } -/* the actual size of a user virtual address */ -extern u64 vabits_user; - /* * Allow all memory at the discovery stage. We will clip it later. */ @@ -201,24 +216,24 @@ extern u64 vabits_user; * pass on to access_ok(), for instance. */ #define untagged_addr(addr) \ - ((__typeof__(addr))sign_extend64((u64)(addr), 55)) + ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55)) #ifdef CONFIG_KASAN_SW_TAGS #define __tag_shifted(tag) ((u64)(tag) << 56) -#define __tag_set(addr, tag) (__typeof__(addr))( \ - ((u64)(addr) & ~__tag_shifted(0xff)) | __tag_shifted(tag)) #define __tag_reset(addr) untagged_addr(addr) #define __tag_get(addr) (__u8)((u64)(addr) >> 56) #else +#define __tag_shifted(tag) 0UL +#define __tag_reset(addr) (addr) +#define __tag_get(addr) 0 +#endif /* CONFIG_KASAN_SW_TAGS */ + static inline const void *__tag_set(const void *addr, u8 tag) { - return addr; + u64 __addr = (u64)addr & ~__tag_shifted(0xff); + return (const void *)(__addr | __tag_shifted(tag)); } -#define __tag_reset(addr) (addr) -#define __tag_get(addr) 0 -#endif - /* * Physical vs virtual RAM address space conversion. These are * private definitions which should NOT be used outside memory.h @@ -227,19 +242,18 @@ static inline const void *__tag_set(const void *addr, u8 tag) /* - * The linear kernel range starts in the middle of the virtual adddress + * The linear kernel range starts at the bottom of the virtual address * space. Testing the top bit for the start of the region is a - * sufficient check. + * sufficient check and avoids having to worry about the tag. */ -#define __is_lm_address(addr) (!!((addr) & BIT(VA_BITS - 1))) +#define __is_lm_address(addr) (!(((u64)addr) & BIT(vabits_actual - 1))) -#define __lm_to_phys(addr) (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET) +#define __lm_to_phys(addr) (((addr) + physvirt_offset)) #define __kimg_to_phys(addr) ((addr) - kimage_voffset) #define __virt_to_phys_nodebug(x) ({ \ - phys_addr_t __x = (phys_addr_t)(x); \ - __is_lm_address(__x) ? __lm_to_phys(__x) : \ - __kimg_to_phys(__x); \ + phys_addr_t __x = (phys_addr_t)(__tag_reset(x)); \ + __is_lm_address(__x) ? __lm_to_phys(__x) : __kimg_to_phys(__x); \ }) #define __pa_symbol_nodebug(x) __kimg_to_phys((phys_addr_t)(x)) @@ -250,9 +264,9 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #else #define __virt_to_phys(x) __virt_to_phys_nodebug(x) #define __phys_addr_symbol(x) __pa_symbol_nodebug(x) -#endif +#endif /* CONFIG_DEBUG_VIRTUAL */ -#define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET) | PAGE_OFFSET) +#define __phys_to_virt(x) ((unsigned long)((x) - physvirt_offset)) #define __phys_to_kimg(x) ((unsigned long)((x) + kimage_voffset)) /* @@ -286,41 +300,38 @@ static inline void *phys_to_virt(phys_addr_t x) #define __pa_nodebug(x) __virt_to_phys_nodebug((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) -#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x))) -#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) +#define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x))) +#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) /* - * virt_to_page(k) convert a _valid_ virtual address to struct page * - * virt_addr_valid(k) indicates whether a virtual address is valid + * virt_to_page(x) convert a _valid_ virtual address to struct page * + * virt_addr_valid(x) indicates whether a virtual address is valid */ #define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) #if !defined(CONFIG_SPARSEMEM_VMEMMAP) || defined(CONFIG_DEBUG_VIRTUAL) -#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) -#define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) +#define virt_to_page(x) pfn_to_page(virt_to_pfn(x)) #else -#define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page)) -#define __page_to_voff(kaddr) (((u64)(kaddr) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) - -#define page_to_virt(page) ({ \ - unsigned long __addr = \ - ((__page_to_voff(page)) | PAGE_OFFSET); \ - const void *__addr_tag = \ - __tag_set((void *)__addr, page_kasan_tag(page)); \ - ((void *)__addr_tag); \ +#define page_to_virt(x) ({ \ + __typeof__(x) __page = x; \ + u64 __idx = ((u64)__page - VMEMMAP_START) / sizeof(struct page);\ + u64 __addr = PAGE_OFFSET + (__idx * PAGE_SIZE); \ + (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\ }) -#define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) +#define virt_to_page(x) ({ \ + u64 __idx = (__tag_reset((u64)x) - PAGE_OFFSET) / PAGE_SIZE; \ + u64 __addr = VMEMMAP_START + (__idx * sizeof(struct page)); \ + (struct page *)__addr; \ +}) +#endif /* !CONFIG_SPARSEMEM_VMEMMAP || CONFIG_DEBUG_VIRTUAL */ -#define _virt_addr_valid(kaddr) pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \ - + PHYS_OFFSET) >> PAGE_SHIFT) -#endif -#endif +#define virt_addr_valid(addr) ({ \ + __typeof__(addr) __addr = addr; \ + __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr)); \ +}) -#define _virt_addr_is_linear(kaddr) \ - (__tag_reset((u64)(kaddr)) >= PAGE_OFFSET) -#define virt_addr_valid(kaddr) \ - (_virt_addr_is_linear(kaddr) && _virt_addr_valid(kaddr)) +#endif /* !ASSEMBLY */ /* * Given that the GIC architecture permits ITS implementations that can only be @@ -335,4 +346,4 @@ static inline void *phys_to_virt(phys_addr_t x) #include <asm-generic/memory_model.h> -#endif +#endif /* __ASM_MEMORY_H */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index fd6161336653..f217e3292919 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -126,7 +126,7 @@ extern void init_mem_pgprot(void); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, bool page_mappings_only); -extern void *fixmap_remap_fdt(phys_addr_t dt_phys); +extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot); extern void mark_linear_text_alias_ro(void); #define INIT_MM_CONTEXT(name) \ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 7ed0adb187a8..3827ff4040a3 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -63,7 +63,7 @@ extern u64 idmap_ptrs_per_pgd; static inline bool __cpu_uses_extended_idmap(void) { - if (IS_ENABLED(CONFIG_ARM64_USER_VA_BITS_52)) + if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52)) return false; return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)); @@ -95,7 +95,7 @@ static inline void __cpu_set_tcr_t0sz(unsigned long t0sz) isb(); } -#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)) +#define cpu_set_default_tcr_t0sz() __cpu_set_tcr_t0sz(TCR_T0SZ(vabits_actual)) #define cpu_set_idmap_tcr_t0sz() __cpu_set_tcr_t0sz(idmap_t0sz) /* diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h index 9e690686e8aa..70b323cf8300 100644 --- a/arch/arm64/include/asm/pci.h +++ b/arch/arm64/include/asm/pci.h @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_PCI_H #define __ASM_PCI_H -#ifdef __KERNEL__ #include <linux/types.h> #include <linux/slab.h> @@ -35,5 +34,4 @@ static inline int pci_proc_domain(struct pci_bus *bus) } #endif /* CONFIG_PCI */ -#endif /* __KERNEL__ */ #endif /* __ASM_PCI_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index db92950bb1a0..3df60f97da1f 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -304,7 +304,7 @@ #define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2) #endif -#ifdef CONFIG_ARM64_USER_VA_BITS_52 +#ifdef CONFIG_ARM64_VA_BITS_52 /* Must be at least 64-byte aligned to prevent corruption of the TTBR */ #define TTBR1_BADDR_4852_OFFSET (((UL(1) << (52 - PGDIR_SHIFT)) - \ (UL(1) << (48 - PGDIR_SHIFT))) * 8) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5fdcfe237338..9a8f7e51c2b1 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -21,9 +21,7 @@ * and fixed mappings */ #define VMALLOC_START (MODULES_END) -#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) - -#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)) +#define VMALLOC_END (- PUD_SIZE - VMEMMAP_SIZE - SZ_64K) #define FIRST_USER_ADDRESS 0UL @@ -35,6 +33,8 @@ #include <linux/mm_types.h> #include <linux/sched.h> +extern struct page *vmemmap; + extern void __pte_error(const char *file, int line, unsigned long val); extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pud_error(const char *file, int line, unsigned long val); @@ -220,8 +220,10 @@ static inline void set_pte(pte_t *ptep, pte_t pte) * Only if the new pte is valid and kernel, otherwise TLB maintenance * or update_mmu_cache() have the necessary barriers. */ - if (pte_valid_not_user(pte)) + if (pte_valid_not_user(pte)) { dsb(ishst); + isb(); + } } extern void __sync_icache_dcache(pte_t pteval); @@ -481,8 +483,10 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) WRITE_ONCE(*pmdp, pmd); - if (pmd_valid(pmd)) + if (pmd_valid(pmd)) { dsb(ishst); + isb(); + } } static inline void pmd_clear(pmd_t *pmdp) @@ -540,8 +544,10 @@ static inline void set_pud(pud_t *pudp, pud_t pud) WRITE_ONCE(*pudp, pud); - if (pud_valid(pud)) + if (pud_valid(pud)) { dsb(ishst); + isb(); + } } static inline void pud_clear(pud_t *pudp) @@ -599,6 +605,7 @@ static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) WRITE_ONCE(*pgdp, pgd); dsb(ishst); + isb(); } static inline void pgd_clear(pgd_t *pgdp) @@ -856,8 +863,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) -#define kc_vaddr_to_offset(v) ((v) & ~VA_START) -#define kc_offset_to_vaddr(o) ((o) | VA_START) +#define kc_vaddr_to_offset(v) ((v) & ~PAGE_END) +#define kc_offset_to_vaddr(o) ((o) | PAGE_END) #ifdef CONFIG_ARM64_PA_BITS_52 #define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52) diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h index d328540cb85e..7a24bad1a58b 100644 --- a/arch/arm64/include/asm/pointer_auth.h +++ b/arch/arm64/include/asm/pointer_auth.h @@ -69,7 +69,7 @@ extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg); * The EL0 pointer bits used by a pointer authentication code. * This is dependent on TBI0 being enabled, or bits 63:56 would also apply. */ -#define ptrauth_user_pac_mask() GENMASK(54, vabits_user) +#define ptrauth_user_pac_mask() GENMASK(54, vabits_actual) /* Only valid for EL0 TTBR0 instruction pointers */ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr) diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h index 368d90a9d0e5..a2ce65a0c1fa 100644 --- a/arch/arm64/include/asm/proc-fns.h +++ b/arch/arm64/include/asm/proc-fns.h @@ -9,7 +9,6 @@ #ifndef __ASM_PROCFNS_H #define __ASM_PROCFNS_H -#ifdef __KERNEL__ #ifndef __ASSEMBLY__ #include <asm/page.h> @@ -25,5 +24,4 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr); #include <asm/memory.h> #endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ #endif /* __ASM_PROCFNS_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 844e2964b0f5..c67848c55009 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -20,7 +20,6 @@ #define NET_IP_ALIGN 0 #ifndef __ASSEMBLY__ -#ifdef __KERNEL__ #include <linux/build_bug.h> #include <linux/cache.h> @@ -42,8 +41,8 @@ * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. */ -#define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS) -#define TASK_SIZE_64 (UL(1) << vabits_user) +#define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS_MIN) +#define TASK_SIZE_64 (UL(1) << vabits_actual) #ifdef CONFIG_COMPAT #if defined(CONFIG_ARM64_64K_PAGES) && defined(CONFIG_KUSER_HELPERS) @@ -283,8 +282,6 @@ static inline void spin_lock_prefetch(const void *ptr) #define HAVE_ARCH_PICK_MMAP_LAYOUT -#endif - extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */ extern void __init minsigstksz_setup(void); @@ -306,6 +303,14 @@ extern void __init minsigstksz_setup(void); /* PR_PAC_RESET_KEYS prctl */ #define PAC_RESET_KEYS(tsk, arg) ptrauth_prctl_reset_keys(tsk, arg) +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI +/* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */ +long set_tagged_addr_ctrl(unsigned long arg); +long get_tagged_addr_ctrl(void); +#define SET_TAGGED_ADDR_CTRL(arg) set_tagged_addr_ctrl(arg) +#define GET_TAGGED_ADDR_CTRL() get_tagged_addr_ctrl() +#endif + /* * For CONFIG_GCC_PLUGIN_STACKLEAK * diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 1dcf63a9ac1f..fbebb411ae20 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -301,6 +301,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->regs[0]; } +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->regs[0] = rc; +} + /** * regs_get_kernel_argument() - get Nth function argument in kernel * @regs: pt_regs of that context diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h index bd43d1cf724b..7e9f163d02ec 100644 --- a/arch/arm64/include/asm/signal32.h +++ b/arch/arm64/include/asm/signal32.h @@ -5,7 +5,6 @@ #ifndef __ASM_SIGNAL32_H #define __ASM_SIGNAL32_H -#ifdef __KERNEL__ #ifdef CONFIG_COMPAT #include <linux/compat.h> @@ -79,5 +78,4 @@ static inline void compat_setup_restart_syscall(struct pt_regs *regs) { } #endif /* CONFIG_COMPAT */ -#endif /* __KERNEL__ */ #endif /* __ASM_SIGNAL32_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 06ebcfef73df..972d196c7714 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -212,6 +212,9 @@ #define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0) #define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) +#define SYS_PAR_EL1_F BIT(1) +#define SYS_PAR_EL1_FST GENMASK(6, 1) + /*** Statistical Profiling Extension ***/ /* ID registers */ #define SYS_PMSIDR_EL1 sys_reg(3, 0, 9, 9, 7) @@ -499,28 +502,11 @@ #define SCTLR_EL2_RES1 ((BIT(4)) | (BIT(5)) | (BIT(11)) | (BIT(16)) | \ (BIT(18)) | (BIT(22)) | (BIT(23)) | (BIT(28)) | \ (BIT(29))) -#define SCTLR_EL2_RES0 ((BIT(6)) | (BIT(7)) | (BIT(8)) | (BIT(9)) | \ - (BIT(10)) | (BIT(13)) | (BIT(14)) | (BIT(15)) | \ - (BIT(17)) | (BIT(20)) | (BIT(24)) | (BIT(26)) | \ - (BIT(27)) | (BIT(30)) | (BIT(31)) | \ - (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL2 SCTLR_ELx_EE -#define ENDIAN_CLEAR_EL2 0 #else #define ENDIAN_SET_EL2 0 -#define ENDIAN_CLEAR_EL2 SCTLR_ELx_EE -#endif - -/* SCTLR_EL2 value used for the hyp-stub */ -#define SCTLR_EL2_SET (SCTLR_ELx_IESB | ENDIAN_SET_EL2 | SCTLR_EL2_RES1) -#define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ - SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ - SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) - -#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffffUL -#error "Inconsistent SCTLR_EL2 set/clear bits" #endif /* SCTLR_EL1 specific flags. */ @@ -539,16 +525,11 @@ #define SCTLR_EL1_RES1 ((BIT(11)) | (BIT(20)) | (BIT(22)) | (BIT(28)) | \ (BIT(29))) -#define SCTLR_EL1_RES0 ((BIT(6)) | (BIT(10)) | (BIT(13)) | (BIT(17)) | \ - (BIT(27)) | (BIT(30)) | (BIT(31)) | \ - (0xffffefffUL << 32)) #ifdef CONFIG_CPU_BIG_ENDIAN #define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) -#define ENDIAN_CLEAR_EL1 0 #else #define ENDIAN_SET_EL1 0 -#define ENDIAN_CLEAR_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) #endif #define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\ @@ -556,13 +537,6 @@ SCTLR_EL1_DZE | SCTLR_EL1_UCT |\ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) -#define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\ - SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ - SCTLR_ELx_DSSBS | SCTLR_EL1_NTWI | SCTLR_EL1_RES0) - -#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffffUL -#error "Inconsistent SCTLR_EL1 set/clear bits" -#endif /* id_aa64isar0 */ #define ID_AA64ISAR0_TS_SHIFT 52 diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 180b34ec5965..f0cec4160136 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -8,8 +8,6 @@ #ifndef __ASM_THREAD_INFO_H #define __ASM_THREAD_INFO_H -#ifdef __KERNEL__ - #include <linux/compiler.h> #ifndef __ASSEMBLY__ @@ -59,29 +57,18 @@ void arch_release_task_struct(struct task_struct *tsk); #endif -/* - * thread information flags: - * TIF_SYSCALL_TRACE - syscall trace active - * TIF_SYSCALL_TRACEPOINT - syscall tracepoint for ftrace - * TIF_SYSCALL_AUDIT - syscall auditing - * TIF_SECCOMP - syscall secure computing - * TIF_SYSCALL_EMU - syscall emulation active - * TIF_SIGPENDING - signal pending - * TIF_NEED_RESCHED - rescheduling necessary - * TIF_NOTIFY_RESUME - callback before returning to user - */ -#define TIF_SIGPENDING 0 -#define TIF_NEED_RESCHED 1 +#define TIF_SIGPENDING 0 /* signal pending */ +#define TIF_NEED_RESCHED 1 /* rescheduling necessary */ #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ #define TIF_FSCHECK 5 /* Check FS is USER_DS on return */ #define TIF_NOHZ 7 -#define TIF_SYSCALL_TRACE 8 -#define TIF_SYSCALL_AUDIT 9 -#define TIF_SYSCALL_TRACEPOINT 10 -#define TIF_SECCOMP 11 -#define TIF_SYSCALL_EMU 12 +#define TIF_SYSCALL_TRACE 8 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ +#define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ +#define TIF_SECCOMP 11 /* syscall secure computing */ +#define TIF_SYSCALL_EMU 12 /* syscall emulation active */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 #define TIF_RESTORE_SIGMASK 20 @@ -90,6 +77,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_SVE 23 /* Scalable Vector Extension in use */ #define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */ #define TIF_SSBD 25 /* Wants SSB mitigation */ +#define TIF_TAGGED_ADDR 26 /* Allow tagged user addresses */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) @@ -121,5 +109,4 @@ void arch_release_task_struct(struct task_struct *tsk); .addr_limit = KERNEL_DS, \ } -#endif /* __KERNEL__ */ #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 8af7a85f76bd..bc3949064725 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -251,6 +251,7 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) dsb(ishst); __tlbi(vaae1is, addr); dsb(ish); + isb(); } #endif diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 5a1c32260c1f..097d6bfac0b7 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -62,6 +62,10 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si { unsigned long ret, limit = current_thread_info()->addr_limit; + if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI) && + test_thread_flag(TIF_TAGGED_ADDR)) + addr = untagged_addr(addr); + __chk_user_ptr(addr); asm volatile( // A + B <= C + 1 for all A,B,C, in four easy steps: @@ -215,7 +219,8 @@ static inline void uaccess_enable_not_uao(void) /* * Sanitise a uaccess pointer such that it becomes NULL if above the - * current addr_limit. + * current addr_limit. In case the pointer is tagged (has the top byte set), + * untag the pointer before checking. */ #define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr) static inline void __user *__uaccess_mask_ptr(const void __user *ptr) @@ -223,10 +228,11 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) void __user *safe_ptr; asm volatile( - " bics xzr, %1, %2\n" + " bics xzr, %3, %2\n" " csel %0, %1, xzr, eq\n" : "=&r" (safe_ptr) - : "r" (ptr), "r" (current_thread_info()->addr_limit) + : "r" (ptr), "r" (current_thread_info()->addr_limit), + "r" (untagged_addr(ptr)) : "cc"); csdb(); diff --git a/arch/arm64/include/asm/vdso.h b/arch/arm64/include/asm/vdso.h index 9c15e0a06301..07468428fd29 100644 --- a/arch/arm64/include/asm/vdso.h +++ b/arch/arm64/include/asm/vdso.h @@ -5,8 +5,6 @@ #ifndef __ASM_VDSO_H #define __ASM_VDSO_H -#ifdef __KERNEL__ - /* * Default link address for the vDSO. * Since we randomise the VDSO mapping, there's little point in trying @@ -28,6 +26,4 @@ #endif /* !__ASSEMBLY__ */ -#endif /* __KERNEL__ */ - #endif /* __ASM_VDSO_H */ diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h index ba6dbc3de864..1f38bf330a6e 100644 --- a/arch/arm64/include/asm/vdso_datapage.h +++ b/arch/arm64/include/asm/vdso_datapage.h @@ -5,8 +5,6 @@ #ifndef __ASM_VDSO_DATAPAGE_H #define __ASM_VDSO_DATAPAGE_H -#ifdef __KERNEL__ - #ifndef __ASSEMBLY__ struct vdso_data { @@ -32,6 +30,4 @@ struct vdso_data { #endif /* !__ASSEMBLY__ */ -#endif /* __KERNEL__ */ - #endif /* __ASM_VDSO_DATAPAGE_H */ diff --git a/arch/arm64/include/uapi/asm/stat.h b/arch/arm64/include/uapi/asm/stat.h deleted file mode 100644 index 313325fa22fa..000000000000 --- a/arch/arm64/include/uapi/asm/stat.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#include <asm-generic/stat.h> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d19d14ba9ae4..95201e5ff5e1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -886,7 +886,7 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _ u32 midr = read_cpuid_id(); /* Cavium ThunderX pass 1.x and 2.x */ - return MIDR_IS_CPU_MODEL_RANGE(midr, MIDR_THUNDERX, + return midr_is_cpu_model_range(midr, MIDR_THUNDERX, MIDR_CPU_VAR_REV(0, 0), MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK)); } diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index d1048173fd8a..e4d6af2fdec7 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -11,6 +11,7 @@ #include <linux/cpu_pm.h> #include <linux/of.h> #include <linux/of_device.h> +#include <linux/psci.h> #include <asm/cpuidle.h> #include <asm/cpu_ops.h> @@ -46,17 +47,58 @@ int arm_cpuidle_suspend(int index) #define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags)) +static int psci_acpi_cpu_init_idle(unsigned int cpu) +{ + int i, count; + struct acpi_lpi_state *lpi; + struct acpi_processor *pr = per_cpu(processors, cpu); + + /* + * If the PSCI cpu_suspend function hook has not been initialized + * idle states must not be enabled, so bail out + */ + if (!psci_ops.cpu_suspend) + return -EOPNOTSUPP; + + if (unlikely(!pr || !pr->flags.has_lpi)) + return -EINVAL; + + count = pr->power.count - 1; + if (count <= 0) + return -ENODEV; + + for (i = 0; i < count; i++) { + u32 state; + + lpi = &pr->power.lpi_states[i + 1]; + /* + * Only bits[31:0] represent a PSCI power_state while + * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification + */ + state = lpi->address; + if (!psci_power_state_is_valid(state)) { + pr_warn("Invalid PSCI power state %#x\n", state); + return -EINVAL; + } + } + + return 0; +} + int acpi_processor_ffh_lpi_probe(unsigned int cpu) { - return arm_cpuidle_init(cpu); + return psci_acpi_cpu_init_idle(cpu); } int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi) { + u32 state = lpi->address; + if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags)) - return CPU_PM_CPU_IDLE_ENTER_RETENTION(arm_cpuidle_suspend, - lpi->index); + return CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM(psci_cpu_suspend_enter, + lpi->index, state); else - return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, lpi->index); + return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, + lpi->index, state); } #endif diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 876055e37352..05933c065732 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; -static char *icache_policy_str[] = { +static const char *icache_policy_str[] = { [0 ... ICACHE_POLICY_PIPT] = "RESERVED/UNKNOWN", [ICACHE_POLICY_VIPT] = "VIPT", [ICACHE_POLICY_PIPT] = "PIPT", diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 320a30dbe35e..84a822748c84 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,9 +30,9 @@ * Context tracking subsystem. Used to instrument transitions * between user and kernel mode. */ - .macro ct_user_exit + .macro ct_user_exit_irqoff #ifdef CONFIG_CONTEXT_TRACKING - bl context_tracking_user_exit + bl enter_from_user_mode #endif .endm @@ -792,8 +792,8 @@ el0_cp15: /* * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions */ + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, x25 mov x1, sp bl do_cp15instr @@ -805,8 +805,8 @@ el0_da: * Data abort handling */ mrs x26, far_el1 + ct_user_exit_irqoff enable_daif - ct_user_exit clear_address_tag x0, x26 mov x1, x25 mov x2, sp @@ -818,11 +818,11 @@ el0_ia: */ mrs x26, far_el1 gic_prio_kentry_setup tmp=x0 + ct_user_exit_irqoff enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif - ct_user_exit mov x0, x26 mov x1, x25 mov x2, sp @@ -832,8 +832,8 @@ el0_fpsimd_acc: /* * Floating Point or Advanced SIMD access */ + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, x25 mov x1, sp bl do_fpsimd_acc @@ -842,8 +842,8 @@ el0_sve_acc: /* * Scalable Vector Extension access */ + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, x25 mov x1, sp bl do_sve_acc @@ -852,8 +852,8 @@ el0_fpsimd_exc: /* * Floating Point, Advanced SIMD or SVE exception */ + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, x25 mov x1, sp bl do_fpsimd_exc @@ -868,11 +868,11 @@ el0_sp_pc: * Stack or PC alignment exception handling */ gic_prio_kentry_setup tmp=x0 + ct_user_exit_irqoff enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif - ct_user_exit mov x0, x26 mov x1, x25 mov x2, sp @@ -882,8 +882,8 @@ el0_undef: /* * Undefined instruction */ + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, sp bl do_undefinstr b ret_to_user @@ -891,8 +891,8 @@ el0_sys: /* * System instructions, for trapped cache maintenance instructions */ + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, x25 mov x1, sp bl do_sysinstr @@ -902,17 +902,18 @@ el0_dbg: * Debug exception handling */ tbnz x24, #0, el0_inv // EL0 only + mrs x24, far_el1 gic_prio_kentry_setup tmp=x3 - mrs x0, far_el1 + ct_user_exit_irqoff + mov x0, x24 mov x1, x25 mov x2, sp bl do_debug_exception enable_da_f - ct_user_exit b ret_to_user el0_inv: + ct_user_exit_irqoff enable_daif - ct_user_exit mov x0, sp mov x1, #BAD_SYNC mov x2, x25 @@ -925,13 +926,13 @@ el0_irq: kernel_entry 0 el0_irq_naked: gic_prio_irq_setup pmr=x20, tmp=x0 + ct_user_exit_irqoff enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif - ct_user_exit #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR tbz x22, #55, 1f bl do_el0_irq_bp_hardening @@ -958,13 +959,14 @@ ENDPROC(el1_error) el0_error: kernel_entry 0 el0_error_naked: - mrs x1, esr_el1 + mrs x25, esr_el1 gic_prio_kentry_setup tmp=x2 + ct_user_exit_irqoff enable_dbg mov x0, sp + mov x1, x25 bl do_serror enable_da_f - ct_user_exit b ret_to_user ENDPROC(el0_error) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2cdacd1c141b..989b1944cb71 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -102,6 +102,8 @@ pe_header: * x23 stext() .. start_kernel() physical misalignment/KASLR offset * x28 __create_page_tables() callee preserved temp register * x19/x20 __primary_switch() callee preserved temp registers + * x24 __primary_switch() .. relocate_kernel() + * current RELR displacement */ ENTRY(stext) bl preserve_boot_args @@ -308,15 +310,15 @@ __create_page_tables: adrp x0, idmap_pg_dir adrp x3, __idmap_text_start // __pa(__idmap_text_start) -#ifdef CONFIG_ARM64_USER_VA_BITS_52 +#ifdef CONFIG_ARM64_VA_BITS_52 mrs_s x6, SYS_ID_AA64MMFR2_EL1 and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT) mov x5, #52 cbnz x6, 1f #endif - mov x5, #VA_BITS + mov x5, #VA_BITS_MIN 1: - adr_l x6, vabits_user + adr_l x6, vabits_actual str x5, [x6] dmb sy dc ivac, x6 // Invalidate potentially stale cache line @@ -724,14 +726,22 @@ __secondary_switched: adr_l x0, secondary_data ldr x1, [x0, #CPU_BOOT_STACK] // get secondary_data.stack + cbz x1, __secondary_too_slow mov sp, x1 ldr x2, [x0, #CPU_BOOT_TASK] + cbz x2, __secondary_too_slow msr sp_el0, x2 mov x29, #0 mov x30, #0 b secondary_start_kernel ENDPROC(__secondary_switched) +__secondary_too_slow: + wfe + wfi + b __secondary_too_slow +ENDPROC(__secondary_too_slow) + /* * The booting CPU updates the failed status @__early_cpu_boot_status, * with MMU turned off. @@ -772,7 +782,7 @@ ENTRY(__enable_mmu) phys_to_ttbr x1, x1 phys_to_ttbr x2, x2 msr ttbr0_el1, x2 // load TTBR0 - offset_ttbr1 x1 + offset_ttbr1 x1, x3 msr ttbr1_el1, x1 // load TTBR1 isb msr sctlr_el1, x0 @@ -789,8 +799,8 @@ ENTRY(__enable_mmu) ENDPROC(__enable_mmu) ENTRY(__cpu_secondary_check52bitva) -#ifdef CONFIG_ARM64_USER_VA_BITS_52 - ldr_l x0, vabits_user +#ifdef CONFIG_ARM64_VA_BITS_52 + ldr_l x0, vabits_actual cmp x0, #52 b.ne 2f @@ -834,14 +844,93 @@ __relocate_kernel: 0: cmp x9, x10 b.hs 1f - ldp x11, x12, [x9], #24 - ldr x13, [x9, #-8] - cmp w12, #R_AARCH64_RELATIVE + ldp x12, x13, [x9], #24 + ldr x14, [x9, #-8] + cmp w13, #R_AARCH64_RELATIVE b.ne 0b - add x13, x13, x23 // relocate - str x13, [x11, x23] + add x14, x14, x23 // relocate + str x14, [x12, x23] b 0b -1: ret + +1: +#ifdef CONFIG_RELR + /* + * Apply RELR relocations. + * + * RELR is a compressed format for storing relative relocations. The + * encoded sequence of entries looks like: + * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ] + * + * i.e. start with an address, followed by any number of bitmaps. The + * address entry encodes 1 relocation. The subsequent bitmap entries + * encode up to 63 relocations each, at subsequent offsets following + * the last address entry. + * + * The bitmap entries must have 1 in the least significant bit. The + * assumption here is that an address cannot have 1 in lsb. Odd + * addresses are not supported. Any odd addresses are stored in the RELA + * section, which is handled above. + * + * Excluding the least significant bit in the bitmap, each non-zero + * bit in the bitmap represents a relocation to be applied to + * a corresponding machine word that follows the base address + * word. The second least significant bit represents the machine + * word immediately following the initial address, and each bit + * that follows represents the next word, in linear order. As such, + * a single bitmap can encode up to 63 relocations in a 64-bit object. + * + * In this implementation we store the address of the next RELR table + * entry in x9, the address being relocated by the current address or + * bitmap entry in x13 and the address being relocated by the current + * bit in x14. + * + * Because addends are stored in place in the binary, RELR relocations + * cannot be applied idempotently. We use x24 to keep track of the + * currently applied displacement so that we can correctly relocate if + * __relocate_kernel is called twice with non-zero displacements (i.e. + * if there is both a physical misalignment and a KASLR displacement). + */ + ldr w9, =__relr_offset // offset to reloc table + ldr w10, =__relr_size // size of reloc table + add x9, x9, x11 // __va(.relr) + add x10, x9, x10 // __va(.relr) + sizeof(.relr) + + sub x15, x23, x24 // delta from previous offset + cbz x15, 7f // nothing to do if unchanged + mov x24, x23 // save new offset + +2: cmp x9, x10 + b.hs 7f + ldr x11, [x9], #8 + tbnz x11, #0, 3f // branch to handle bitmaps + add x13, x11, x23 + ldr x12, [x13] // relocate address entry + add x12, x12, x15 + str x12, [x13], #8 // adjust to start of bitmap + b 2b + +3: mov x14, x13 +4: lsr x11, x11, #1 + cbz x11, 6f + tbz x11, #0, 5f // skip bit if not set + ldr x12, [x14] // relocate bit + add x12, x12, x15 + str x12, [x14] + +5: add x14, x14, #8 // move to next bit's address + b 4b + +6: /* + * Move to the next bitmap's address. 8 is the word size, and 63 is the + * number of significant bits in a bitmap entry. + */ + add x13, x13, #(8 * 63) + b 2b + +7: +#endif + ret + ENDPROC(__relocate_kernel) #endif @@ -854,6 +943,9 @@ __primary_switch: adrp x1, init_pg_dir bl __enable_mmu #ifdef CONFIG_RELOCATABLE +#ifdef CONFIG_RELR + mov x24, #0 // no RELR displacement yet +#endif bl __relocate_kernel #ifdef CONFIG_RANDOMIZE_BASE ldr x8, =__primary_switched diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 2f4a2ce7264b..38bcd4d4e43b 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -22,14 +22,14 @@ * Even switching to our copied tables will cause a changed output address at * each stage of the walk. */ -.macro break_before_make_ttbr_switch zero_page, page_table, tmp +.macro break_before_make_ttbr_switch zero_page, page_table, tmp, tmp2 phys_to_ttbr \tmp, \zero_page msr ttbr1_el1, \tmp isb tlbi vmalle1 dsb nsh phys_to_ttbr \tmp, \page_table - offset_ttbr1 \tmp + offset_ttbr1 \tmp, \tmp2 msr ttbr1_el1, \tmp isb .endm @@ -70,7 +70,7 @@ ENTRY(swsusp_arch_suspend_exit) * We execute from ttbr0, change ttbr1 to our copied linear map tables * with a break-before-make via the zero page */ - break_before_make_ttbr_switch x5, x0, x6 + break_before_make_ttbr_switch x5, x0, x6, x8 mov x21, x1 mov x30, x2 @@ -101,7 +101,7 @@ ENTRY(swsusp_arch_suspend_exit) dsb ish /* wait for PoU cleaning to finish */ /* switch to the restored kernels page tables */ - break_before_make_ttbr_switch x25, x21, x6 + break_before_make_ttbr_switch x25, x21, x6, x8 ic ialluis dsb ish diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 9341fcc6e809..e0a7fce0e01c 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -496,7 +496,7 @@ int swsusp_arch_resume(void) rc = -ENOMEM; goto out; } - rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0); + rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, PAGE_END); if (rc) goto out; diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h new file mode 100644 index 000000000000..25a2a9b479c2 --- /dev/null +++ b/arch/arm64/kernel/image-vars.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Linker script variables to be set after section resolution, as + * ld.lld does not like variables assigned before SECTIONS is processed. + */ +#ifndef __ARM64_KERNEL_IMAGE_VARS_H +#define __ARM64_KERNEL_IMAGE_VARS_H + +#ifndef LINKER_SCRIPT +#error This file should only be included in vmlinux.lds.S +#endif + +#ifdef CONFIG_EFI + +__efistub_stext_offset = stext - _text; + +/* + * The EFI stub has its own symbol namespace prefixed by __efistub_, to + * isolate it from the kernel proper. The following symbols are legally + * accessed by the stub, so provide some aliases to make them accessible. + * Only include data symbols here, or text symbols of functions that are + * guaranteed to be safe when executed at another offset than they were + * linked at. The routines below are all implemented in assembler in a + * position independent manner + */ +__efistub_memcmp = __pi_memcmp; +__efistub_memchr = __pi_memchr; +__efistub_memcpy = __pi_memcpy; +__efistub_memmove = __pi_memmove; +__efistub_memset = __pi_memset; +__efistub_strlen = __pi_strlen; +__efistub_strnlen = __pi_strnlen; +__efistub_strcmp = __pi_strcmp; +__efistub_strncmp = __pi_strncmp; +__efistub_strrchr = __pi_strrchr; +__efistub___flush_dcache_area = __pi___flush_dcache_area; + +#ifdef CONFIG_KASAN +__efistub___memcpy = __pi_memcpy; +__efistub___memmove = __pi_memmove; +__efistub___memset = __pi_memset; +#endif + +__efistub__text = _text; +__efistub__end = _end; +__efistub__edata = _edata; +__efistub_screen_info = screen_info; + +#endif + +#endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h index 2b85c0d6fa3d..c7d38c660372 100644 --- a/arch/arm64/kernel/image.h +++ b/arch/arm64/kernel/image.h @@ -65,46 +65,4 @@ DEFINE_IMAGE_LE64(_kernel_offset_le, TEXT_OFFSET); \ DEFINE_IMAGE_LE64(_kernel_flags_le, __HEAD_FLAGS); -#ifdef CONFIG_EFI - -/* - * Use ABSOLUTE() to avoid ld.lld treating this as a relative symbol: - * https://github.com/ClangBuiltLinux/linux/issues/561 - */ -__efistub_stext_offset = ABSOLUTE(stext - _text); - -/* - * The EFI stub has its own symbol namespace prefixed by __efistub_, to - * isolate it from the kernel proper. The following symbols are legally - * accessed by the stub, so provide some aliases to make them accessible. - * Only include data symbols here, or text symbols of functions that are - * guaranteed to be safe when executed at another offset than they were - * linked at. The routines below are all implemented in assembler in a - * position independent manner - */ -__efistub_memcmp = __pi_memcmp; -__efistub_memchr = __pi_memchr; -__efistub_memcpy = __pi_memcpy; -__efistub_memmove = __pi_memmove; -__efistub_memset = __pi_memset; -__efistub_strlen = __pi_strlen; -__efistub_strnlen = __pi_strnlen; -__efistub_strcmp = __pi_strcmp; -__efistub_strncmp = __pi_strncmp; -__efistub_strrchr = __pi_strrchr; -__efistub___flush_dcache_area = __pi___flush_dcache_area; - -#ifdef CONFIG_KASAN -__efistub___memcpy = __pi_memcpy; -__efistub___memmove = __pi_memmove; -__efistub___memset = __pi_memset; -#endif - -__efistub__text = _text; -__efistub__end = _end; -__efistub__edata = _edata; -__efistub_screen_info = screen_info; - -#endif - #endif /* __ARM64_KERNEL_IMAGE_H */ diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 84b059ed04fc..d801a7094076 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -26,7 +26,7 @@ #define AARCH64_INSN_N_BIT BIT(22) #define AARCH64_INSN_LSL_12 BIT(22) -static int aarch64_insn_encoding_class[] = { +static const int aarch64_insn_encoding_class[] = { AARCH64_INSN_CLS_UNKNOWN, AARCH64_INSN_CLS_UNKNOWN, AARCH64_INSN_CLS_UNKNOWN, diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 708051655ad9..416f537bf614 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -62,9 +62,6 @@ out: return default_cmdline; } -extern void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, - pgprot_t prot); - /* * This routine will be executed with the kernel mapped at its default virtual * address, and if it returns successfully, the kernel will be remapped, and @@ -93,7 +90,7 @@ u64 __init kaslr_early_init(u64 dt_phys) * attempt at mapping the FDT in setup_machine() */ early_fixmap_init(); - fdt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); + fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); if (!fdt) return 0; @@ -116,15 +113,15 @@ u64 __init kaslr_early_init(u64 dt_phys) /* * OK, so we are proceeding with KASLR enabled. Calculate a suitable * kernel image offset from the seed. Let's place the kernel in the - * middle half of the VMALLOC area (VA_BITS - 2), and stay clear of + * middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of * the lower and upper quarters to avoid colliding with other * allocations. * Even if we could randomize at page granularity for 16k and 64k pages, * let's always round to 2 MB so we don't interfere with the ability to * map using contiguous PTEs */ - mask = ((1UL << (VA_BITS - 2)) - 1) & ~(SZ_2M - 1); - offset = BIT(VA_BITS - 3) + (seed & mask); + mask = ((1UL << (VA_BITS_MIN - 2)) - 1) & ~(SZ_2M - 1); + offset = BIT(VA_BITS_MIN - 3) + (seed & mask); /* use the top 16 bits to randomize the linear region */ memstart_offset_seed = seed >> 48; diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 2514fd6f12cb..29a9428486a5 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -84,7 +84,7 @@ static void *image_load(struct kimage *image, kbuf.buffer = kernel; kbuf.bufsz = kernel_len; - kbuf.mem = 0; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; kbuf.memsz = le64_to_cpu(h->image_size); text_offset = le64_to_cpu(h->text_offset); kbuf.buf_align = MIN_KIMG_ALIGN; diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 58871333737a..7b08bf9499b6 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -27,6 +27,8 @@ #define FDT_PROP_INITRD_END "linux,initrd-end" #define FDT_PROP_BOOTARGS "bootargs" #define FDT_PROP_KASLR_SEED "kaslr-seed" +#define FDT_PROP_RNG_SEED "rng-seed" +#define RNG_SEED_SIZE 128 const struct kexec_file_ops * const kexec_file_loaders[] = { &kexec_image_ops, @@ -102,6 +104,19 @@ static int setup_dtb(struct kimage *image, FDT_PROP_KASLR_SEED); } + /* add rng-seed */ + if (rng_is_initialized()) { + u8 rng_seed[RNG_SEED_SIZE]; + get_random_bytes(rng_seed, RNG_SEED_SIZE); + ret = fdt_setprop(dtb, off, FDT_PROP_RNG_SEED, rng_seed, + RNG_SEED_SIZE); + if (ret) + goto out; + } else { + pr_notice("RNG is not initialised: omitting \"%s\" property\n", + FDT_PROP_RNG_SEED); + } + out: if (ret) return (ret == -FDT_ERR_NOSPACE) ? -ENOMEM : -EINVAL; @@ -110,7 +125,8 @@ out: } /* - * More space needed so that we can add initrd, bootargs and kaslr-seed. + * More space needed so that we can add initrd, bootargs, kaslr-seed, and + * rng-seed. */ #define DTB_EXTRA_SPACE 0x1000 @@ -177,7 +193,7 @@ int load_other_segments(struct kimage *image, if (initrd) { kbuf.buffer = initrd; kbuf.bufsz = initrd_len; - kbuf.mem = 0; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; kbuf.memsz = initrd_len; kbuf.buf_align = 0; /* within 1GB-aligned window of up to 32GB in size */ @@ -204,7 +220,7 @@ int load_other_segments(struct kimage *image, dtb_len = fdt_totalsize(dtb); kbuf.buffer = dtb; kbuf.bufsz = dtb_len; - kbuf.mem = 0; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; kbuf.memsz = dtb_len; /* not across 2MB boundary */ kbuf.buf_align = SZ_2M; diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index 044c0ae4d6c8..b182442b87a3 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -302,7 +302,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, /* sort by type, symbol index and addend */ sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL); - if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0) + if (!str_has_prefix(secstrings + dstsec->sh_name, ".init")) core_plts += count_plts(syms, rels, numrels, sechdrs[i].sh_info, dstsec); else diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 96e90e270042..a0b4f1bca491 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -19,6 +19,7 @@ #include <linux/of.h> #include <linux/perf/arm_pmu.h> #include <linux/platform_device.h> +#include <linux/smp.h> /* ARMv8 Cortex-A53 specific event types. */ #define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2 @@ -157,7 +158,6 @@ armv8pmu_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%03llx\n", pmu_attr->id); } -#define ARMV8_EVENT_ATTR_RESOLVE(m) #m #define ARMV8_EVENT_ATTR(name, config) \ PMU_EVENT_ATTR(name, armv8_event_attr_##name, \ config, armv8pmu_events_sysfs_show) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index f674f28df663..03689c0beb34 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <linux/mm.h> #include <linux/stddef.h> +#include <linux/sysctl.h> #include <linux/unistd.h> #include <linux/user.h> #include <linux/delay.h> @@ -38,6 +39,7 @@ #include <trace/events/power.h> #include <linux/percpu.h> #include <linux/thread_info.h> +#include <linux/prctl.h> #include <asm/alternative.h> #include <asm/arch_gicv3.h> @@ -307,11 +309,18 @@ static void tls_thread_flush(void) } } +static void flush_tagged_addr_state(void) +{ + if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI)) + clear_thread_flag(TIF_TAGGED_ADDR); +} + void flush_thread(void) { fpsimd_flush_thread(); tls_thread_flush(); flush_ptrace_hw_breakpoint(current); + flush_tagged_addr_state(); } void release_thread(struct task_struct *dead_task) @@ -565,3 +574,70 @@ void arch_setup_new_exec(void) ptrauth_thread_init_user(current); } + +#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI +/* + * Control the relaxed ABI allowing tagged user addresses into the kernel. + */ +static unsigned int tagged_addr_disabled; + +long set_tagged_addr_ctrl(unsigned long arg) +{ + if (is_compat_task()) + return -EINVAL; + if (arg & ~PR_TAGGED_ADDR_ENABLE) + return -EINVAL; + + /* + * Do not allow the enabling of the tagged address ABI if globally + * disabled via sysctl abi.tagged_addr_disabled. + */ + if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled) + return -EINVAL; + + update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE); + + return 0; +} + +long get_tagged_addr_ctrl(void) +{ + if (is_compat_task()) + return -EINVAL; + + if (test_thread_flag(TIF_TAGGED_ADDR)) + return PR_TAGGED_ADDR_ENABLE; + + return 0; +} + +/* + * Global sysctl to disable the tagged user addresses support. This control + * only prevents the tagged address ABI enabling via prctl() and does not + * disable it for tasks that already opted in to the relaxed ABI. + */ +static int zero; +static int one = 1; + +static struct ctl_table tagged_addr_sysctl_table[] = { + { + .procname = "tagged_addr_disabled", + .mode = 0644, + .data = &tagged_addr_disabled, + .maxlen = sizeof(int), + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { } +}; + +static int __init tagged_addr_init(void) +{ + if (!register_sysctl("abi", tagged_addr_sysctl_table)) + return -EINVAL; + return 0; +} + +core_initcall(tagged_addr_init); +#endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */ diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 85ee7d07889e..c9f72b2665f1 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -46,6 +46,11 @@ static int cpu_psci_cpu_boot(unsigned int cpu) } #ifdef CONFIG_HOTPLUG_CPU +static bool cpu_psci_cpu_can_disable(unsigned int cpu) +{ + return !psci_tos_resident_on(cpu); +} + static int cpu_psci_cpu_disable(unsigned int cpu) { /* Fail early if we don't have CPU_OFF support */ @@ -105,14 +110,11 @@ static int cpu_psci_cpu_kill(unsigned int cpu) const struct cpu_operations cpu_psci_ops = { .name = "psci", -#ifdef CONFIG_CPU_IDLE - .cpu_init_idle = psci_cpu_init_idle, - .cpu_suspend = psci_cpu_suspend_enter, -#endif .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, .cpu_boot = cpu_psci_cpu_boot, #ifdef CONFIG_HOTPLUG_CPU + .cpu_can_disable = cpu_psci_cpu_can_disable, .cpu_disable = cpu_psci_cpu_disable, .cpu_die = cpu_psci_cpu_die, .cpu_kill = cpu_psci_cpu_kill, diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 3cf3b135027e..21176d02e21a 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -870,7 +870,7 @@ static int sve_set(struct task_struct *target, goto out; /* - * Apart from PT_SVE_REGS_MASK, all PT_SVE_* flags are consumed by + * Apart from SVE_PT_REGS_MASK, all SVE_PT_* flags are consumed by * sve_set_vector_length(), which will also validate them for us: */ ret = sve_set_vector_length(target, header.vl, diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 9c4bad7d7131..56f664561754 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -170,9 +170,13 @@ static void __init smp_build_mpidr_hash(void) static void __init setup_machine_fdt(phys_addr_t dt_phys) { - void *dt_virt = fixmap_remap_fdt(dt_phys); + int size; + void *dt_virt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); const char *name; + if (dt_virt) + memblock_reserve(dt_phys, size); + if (!dt_virt || !early_init_dt_scan(dt_virt)) { pr_crit("\n" "Error: invalid device tree blob at physical address %pa (virtual address 0x%p)\n" @@ -184,6 +188,9 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) cpu_relax(); } + /* Early fixups are done, map the FDT as read-only now */ + fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); + name = of_flat_dt_get_machine_name(); if (!name) return; @@ -357,6 +364,15 @@ void __init setup_arch(char **cmdline_p) } } +static inline bool cpu_can_disable(unsigned int cpu) +{ +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_can_disable) + return cpu_ops[cpu]->cpu_can_disable(cpu); +#endif + return false; +} + static int __init topology_init(void) { int i; @@ -366,7 +382,7 @@ static int __init topology_init(void) for_each_possible_cpu(i) { struct cpu *cpu = &per_cpu(cpu_data.cpu, i); - cpu->hotpluggable = 1; + cpu->hotpluggable = cpu_can_disable(i); register_cpu(cpu, i); } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 018a33e01b0e..dc9fe879c279 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -123,7 +123,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) * time out. */ wait_for_completion_timeout(&cpu_running, - msecs_to_jiffies(1000)); + msecs_to_jiffies(5000)); if (!cpu_online(cpu)) { pr_crit("CPU%u: failed to come online\n", cpu); @@ -136,6 +136,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) secondary_data.task = NULL; secondary_data.stack = NULL; + __flush_dcache_area(&secondary_data, sizeof(secondary_data)); status = READ_ONCE(secondary_data.status); if (ret && status) { @@ -146,6 +147,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) default: pr_err("CPU%u: failed in unknown state : 0x%lx\n", cpu, status); + cpus_stuck_in_kernel++; break; case CPU_KILL_ME: if (!op_cpu_kill(cpu)) { diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 76c2739ba8a4..c8a3fee00c11 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -19,7 +19,7 @@ #include <asm/smp_plat.h> extern void secondary_holding_pen(void); -volatile unsigned long __section(".mmuoff.data.read") +volatile unsigned long __section(.mmuoff.data.read) secondary_holding_pen_release = INVALID_HWID; static phys_addr_t cpu_release_addr[NR_CPUS]; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index d3313797cca9..6e950908eb97 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -7,9 +7,11 @@ */ #include <linux/bug.h> +#include <linux/context_tracking.h> #include <linux/signal.h> #include <linux/personality.h> #include <linux/kallsyms.h> +#include <linux/kprobes.h> #include <linux/spinlock.h> #include <linux/uaccess.h> #include <linux/hardirq.h> @@ -511,7 +513,7 @@ struct sys64_hook { void (*handler)(unsigned int esr, struct pt_regs *regs); }; -static struct sys64_hook sys64_hooks[] = { +static const struct sys64_hook sys64_hooks[] = { { .esr_mask = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK, .esr_val = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL, @@ -636,7 +638,7 @@ static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) arm64_compat_skip_faulting_instruction(regs, 4); } -static struct sys64_hook cp15_32_hooks[] = { +static const struct sys64_hook cp15_32_hooks[] = { { .esr_mask = ESR_ELx_CP15_32_ISS_SYS_MASK, .esr_val = ESR_ELx_CP15_32_ISS_SYS_CNTFRQ, @@ -656,7 +658,7 @@ static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) arm64_compat_skip_faulting_instruction(regs, 4); } -static struct sys64_hook cp15_64_hooks[] = { +static const struct sys64_hook cp15_64_hooks[] = { { .esr_mask = ESR_ELx_CP15_64_ISS_SYS_MASK, .esr_val = ESR_ELx_CP15_64_ISS_SYS_CNTVCT, @@ -667,7 +669,7 @@ static struct sys64_hook cp15_64_hooks[] = { asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) { - struct sys64_hook *hook, *hook_base; + const struct sys64_hook *hook, *hook_base; if (!cp15_cond_valid(esr, regs)) { /* @@ -707,7 +709,7 @@ asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) { - struct sys64_hook *hook; + const struct sys64_hook *hook; for (hook = sys64_hooks; hook->handler; hook++) if ((hook->esr_mask & esr) == hook->esr_val) { @@ -743,6 +745,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_SMC64] = "SMC (AArch64)", [ESR_ELx_EC_SYS64] = "MSR/MRS (AArch64)", [ESR_ELx_EC_SVE] = "SVE", + [ESR_ELx_EC_ERET] = "ERET/ERETAA/ERETAB", [ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF", [ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)", [ESR_ELx_EC_IABT_CUR] = "IABT (current EL)", @@ -899,6 +902,13 @@ asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) nmi_exit(); } +asmlinkage void enter_from_user_mode(void) +{ + CT_WARN_ON(ct_state() != CONTEXT_USER); + user_exit_irqoff(); +} +NOKPROBE_SYMBOL(enter_from_user_mode); + void __pte_error(const char *file, int line, unsigned long val) { pr_err("%s:%d: bad pte %016lx.\n", file, line, val); diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 7fa008374907..aa76f7259668 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -200,6 +200,15 @@ SECTIONS __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR); __rela_size = SIZEOF(.rela.dyn); +#ifdef CONFIG_RELR + .relr.dyn : ALIGN(8) { + *(.relr.dyn) + } + + __relr_offset = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR); + __relr_size = SIZEOF(.relr.dyn); +#endif + . = ALIGN(SEGMENT_ALIGN); __initdata_end = .; __init_end = .; @@ -245,6 +254,8 @@ SECTIONS HEAD_SYMBOLS } +#include "image-vars.h" + /* * The HYP init code and ID map text can't be longer than a page each, * and should not cross a page boundary. diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index adaf266d8de8..bd978ad71936 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -264,7 +264,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar) tmp = read_sysreg(par_el1); write_sysreg(par, par_el1); - if (unlikely(tmp & 1)) + if (unlikely(tmp & SYS_PAR_EL1_F)) return false; /* Translation failed, back to guest */ /* Convert PAR to HPFAR format */ diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index acd8084f1f2c..2cf7d4b606c3 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -29,25 +29,25 @@ static void compute_layout(void) int kva_msb; /* Where is my RAM region? */ - hyp_va_msb = idmap_addr & BIT(VA_BITS - 1); - hyp_va_msb ^= BIT(VA_BITS - 1); + hyp_va_msb = idmap_addr & BIT(vabits_actual - 1); + hyp_va_msb ^= BIT(vabits_actual - 1); kva_msb = fls64((u64)phys_to_virt(memblock_start_of_DRAM()) ^ (u64)(high_memory - 1)); - if (kva_msb == (VA_BITS - 1)) { + if (kva_msb == (vabits_actual - 1)) { /* * No space in the address, let's compute the mask so - * that it covers (VA_BITS - 1) bits, and the region + * that it covers (vabits_actual - 1) bits, and the region * bit. The tag stays set to zero. */ - va_mask = BIT(VA_BITS - 1) - 1; + va_mask = BIT(vabits_actual - 1) - 1; va_mask |= hyp_va_msb; } else { /* * We do have some free bits to insert a random tag. * Hyp VAs are now created from kernel linear map VAs - * using the following formula (with V == VA_BITS): + * using the following formula (with V == vabits_actual): * * 63 ... V | V-1 | V-2 .. tag_lsb | tag_lsb - 1 .. 0 * --------------------------------------------------------- @@ -55,7 +55,7 @@ static void compute_layout(void) */ tag_lsb = kva_msb; va_mask = GENMASK_ULL(tag_lsb - 1, 0); - tag_val = get_random_long() & GENMASK_ULL(VA_BITS - 2, tag_lsb); + tag_val = get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); tag_val |= hyp_va_msb; tag_val >>= tag_lsb; } diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 33c2a4abda04..f182ccb0438e 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -33,3 +33,5 @@ UBSAN_SANITIZE_atomic_ll_sc.o := n lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o obj-$(CONFIG_CRC32) += crc32.o + +obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/arm64/lib/error-inject.c b/arch/arm64/lib/error-inject.c new file mode 100644 index 000000000000..ed15021da3ed --- /dev/null +++ b/arch/arm64/lib/error-inject.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/error-injection.h> +#include <linux/kprobes.h> + +void override_function_with_return(struct pt_regs *regs) +{ + /* + * 'regs' represents the state on entry of a predefined function in + * the kernel/module and which is captured on a kprobe. + * + * When kprobe returns back from exception it will override the end + * of probed function and directly return to the predefined + * function's caller. + */ + instruction_pointer_set(regs, procedure_link_pointer(regs)); +} +NOKPROBE_SYMBOL(override_function_with_return); diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 82b3a7fdb4a6..93f9f77582ae 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -25,9 +25,20 @@ #include <asm/pgtable-hwdef.h> #include <asm/ptdump.h> -static const struct addr_marker address_markers[] = { + +enum address_markers_idx { + PAGE_OFFSET_NR = 0, + PAGE_END_NR, #ifdef CONFIG_KASAN - { KASAN_SHADOW_START, "Kasan shadow start" }, + KASAN_START_NR, +#endif +}; + +static struct addr_marker address_markers[] = { + { PAGE_OFFSET, "Linear Mapping start" }, + { 0 /* PAGE_END */, "Linear Mapping end" }, +#ifdef CONFIG_KASAN + { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" }, { KASAN_SHADOW_END, "Kasan shadow end" }, #endif { MODULES_VADDR, "Modules start" }, @@ -42,7 +53,6 @@ static const struct addr_marker address_markers[] = { { VMEMMAP_START, "vmemmap start" }, { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, #endif - { PAGE_OFFSET, "Linear mapping" }, { -1, NULL }, }; @@ -376,7 +386,7 @@ static void ptdump_initialize(void) static struct ptdump_info kernel_ptdump_info = { .mm = &init_mm, .markers = address_markers, - .base_addr = VA_START, + .base_addr = PAGE_OFFSET, }; void ptdump_check_wx(void) @@ -390,7 +400,7 @@ void ptdump_check_wx(void) .check_wx = true, }; - walk_pgd(&st, &init_mm, VA_START); + walk_pgd(&st, &init_mm, PAGE_OFFSET); note_page(&st, 0, 0, 0); if (st.wx_pages || st.uxn_pages) pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", @@ -401,6 +411,10 @@ void ptdump_check_wx(void) static int ptdump_init(void) { + address_markers[PAGE_END_NR].start_address = PAGE_END; +#ifdef CONFIG_KASAN + address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START; +#endif ptdump_initialize(); ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables"); return 0; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index cfd65b63f36f..115d7a0e4b08 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -8,6 +8,7 @@ */ #include <linux/acpi.h> +#include <linux/bitfield.h> #include <linux/extable.h> #include <linux/signal.h> #include <linux/mm.h> @@ -86,8 +87,8 @@ static void mem_abort_decode(unsigned int esr) pr_alert("Mem abort info:\n"); pr_alert(" ESR = 0x%08x\n", esr); - pr_alert(" Exception class = %s, IL = %u bits\n", - esr_get_class_string(esr), + pr_alert(" EC = 0x%02lx: %s, IL = %u bits\n", + ESR_ELx_EC(esr), esr_get_class_string(esr), (esr & ESR_ELx_IL) ? 32 : 16); pr_alert(" SET = %lu, FnV = %lu\n", (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT, @@ -109,7 +110,7 @@ static inline bool is_ttbr0_addr(unsigned long addr) static inline bool is_ttbr1_addr(unsigned long addr) { /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ - return arch_kasan_reset_tag(addr) >= VA_START; + return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; } /* @@ -138,10 +139,9 @@ static void show_pte(unsigned long addr) return; } - pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp=%016lx\n", + pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n", mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, - mm == &init_mm ? VA_BITS : (int)vabits_user, - (unsigned long)virt_to_phys(mm->pgd)); + vabits_actual, (unsigned long)virt_to_phys(mm->pgd)); pgdp = pgd_offset(mm, addr); pgd = READ_ONCE(*pgdp); pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); @@ -242,6 +242,34 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, return false; } +static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, + unsigned int esr, + struct pt_regs *regs) +{ + unsigned long flags; + u64 par, dfsc; + + if (ESR_ELx_EC(esr) != ESR_ELx_EC_DABT_CUR || + (esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT) + return false; + + local_irq_save(flags); + asm volatile("at s1e1r, %0" :: "r" (addr)); + isb(); + par = read_sysreg(par_el1); + local_irq_restore(flags); + + if (!(par & SYS_PAR_EL1_F)) + return false; + + /* + * If we got a different type of fault from the AT instruction, + * treat the translation fault as spurious. + */ + dfsc = FIELD_PREP(SYS_PAR_EL1_FST, par); + return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT; +} + static void die_kernel_fault(const char *msg, unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -270,6 +298,10 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; + if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs), + "Ignoring spurious kernel translation fault at virtual address %016lx\n", addr)) + return; + if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f3c795278def..45c00a54909c 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -50,6 +50,12 @@ s64 memstart_addr __ro_after_init = -1; EXPORT_SYMBOL(memstart_addr); +s64 physvirt_offset __ro_after_init; +EXPORT_SYMBOL(physvirt_offset); + +struct page *vmemmap __ro_after_init; +EXPORT_SYMBOL(vmemmap); + phys_addr_t arm64_dma_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE @@ -301,7 +307,7 @@ static void __init fdt_enforce_memory_region(void) void __init arm64_memblock_init(void) { - const s64 linear_region_size = -(s64)PAGE_OFFSET; + const s64 linear_region_size = BIT(vabits_actual - 1); /* Handle linux,usable-memory-range property */ fdt_enforce_memory_region(); @@ -310,18 +316,25 @@ void __init arm64_memblock_init(void) memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); /* - * Ensure that the linear region takes up exactly half of the kernel - * virtual address space. This way, we can distinguish a linear address - * from a kernel/module/vmalloc address by testing a single bit. - */ - BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1)); - - /* * Select a suitable value for the base of physical memory. */ memstart_addr = round_down(memblock_start_of_DRAM(), ARM64_MEMSTART_ALIGN); + physvirt_offset = PHYS_OFFSET - PAGE_OFFSET; + + vmemmap = ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)); + + /* + * If we are running with a 52-bit kernel VA config on a system that + * does not support it, we have to offset our vmemmap and physvirt_offset + * s.t. we avoid the 52-bit portion of the direct linear map + */ + if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52)) { + vmemmap += (_PAGE_OFFSET(48) - _PAGE_OFFSET(52)) >> PAGE_SHIFT; + physvirt_offset = PHYS_OFFSET - _PAGE_OFFSET(48); + } + /* * Remove the memory that we will not be able to cover with the * linear mapping. Take care not to clip the kernel which may be @@ -570,8 +583,12 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { + unsigned long aligned_start, aligned_end; + + aligned_start = __virt_to_phys(start) & PAGE_MASK; + aligned_end = PAGE_ALIGN(__virt_to_phys(end)); + memblock_free(aligned_start, aligned_end - aligned_start); free_reserved_area((void *)start, (void *)end, 0, "initrd"); - memblock_free(__virt_to_phys(start), end - start); } #endif diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 6cf97b904ebb..f87a32484ea8 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -156,7 +156,8 @@ asmlinkage void __init kasan_early_init(void) { BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), PGDIR_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true); @@ -225,10 +226,10 @@ void __init kasan_init(void) kasan_map_populate(kimg_shadow_start, kimg_shadow_end, early_pfn_to_nid(virt_to_pfn(lm_alias(_text)))); - kasan_populate_early_shadow((void *)KASAN_SHADOW_START, - (void *)mod_shadow_start); + kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), + (void *)mod_shadow_start); kasan_populate_early_shadow((void *)kimg_shadow_end, - kasan_mem_to_shadow((void *)PAGE_OFFSET)); + (void *)KASAN_SHADOW_END); if (kimg_shadow_start > mod_shadow_end) kasan_populate_early_shadow((void *)mod_shadow_end, diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 750a69dde39b..53dc6f24cfb7 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -40,8 +40,9 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; -u64 vabits_user __ro_after_init; -EXPORT_SYMBOL(vabits_user); + +u64 __section(".mmuoff.data.write") vabits_actual; +EXPORT_SYMBOL(vabits_actual); u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); @@ -398,7 +399,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if (virt < VMALLOC_START) { + if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -425,7 +426,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, static void update_mapping_prot(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if (virt < VMALLOC_START) { + if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -646,6 +647,8 @@ static void __init map_kernel(pgd_t *pgdp) set_pgd(pgd_offset_raw(pgdp, FIXADDR_START), READ_ONCE(*pgd_offset_k(FIXADDR_START))); } else if (CONFIG_PGTABLE_LEVELS > 3) { + pgd_t *bm_pgdp; + pud_t *bm_pudp; /* * The fixmap shares its top level pgd entry with the kernel * mapping. This can really only occur when we are running @@ -653,9 +656,9 @@ static void __init map_kernel(pgd_t *pgdp) * entry instead. */ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - pud_populate(&init_mm, - pud_set_fixmap_offset(pgdp, FIXADDR_START), - lm_alias(bm_pmd)); + bm_pgdp = pgd_offset_raw(pgdp, FIXADDR_START); + bm_pudp = pud_set_fixmap_offset(bm_pgdp, FIXADDR_START); + pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd)); pud_clear_fixmap(); } else { BUG(); @@ -876,7 +879,7 @@ void __set_fixmap(enum fixed_addresses idx, } } -void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) +void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); int offset; @@ -929,19 +932,6 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) return dt_virt; } -void *__init fixmap_remap_fdt(phys_addr_t dt_phys) -{ - void *dt_virt; - int size; - - dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); - if (!dt_virt) - return NULL; - - memblock_reserve(dt_phys, size); - return dt_virt; -} - int __init arch_ioremap_p4d_supported(void) { return 0; diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 4f241cc7cc3b..4decf1659700 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -29,7 +29,7 @@ static __init int numa_parse_early_param(char *opt) { if (!opt) return -EINVAL; - if (!strncmp(opt, "off", 3)) + if (str_has_prefix(opt, "off")) numa_off = true; return 0; diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 03c53f16ee77..9ce7bd9d4d9c 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -128,7 +128,6 @@ int set_memory_nx(unsigned long addr, int numpages) __pgprot(PTE_PXN), __pgprot(0)); } -EXPORT_SYMBOL_GPL(set_memory_nx); int set_memory_x(unsigned long addr, int numpages) { @@ -136,7 +135,6 @@ int set_memory_x(unsigned long addr, int numpages) __pgprot(0), __pgprot(PTE_PXN)); } -EXPORT_SYMBOL_GPL(set_memory_x); int set_memory_valid(unsigned long addr, int numpages, int enable) { diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 7dbf2be470f6..a1e0592d1fbc 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -168,7 +168,7 @@ ENDPROC(cpu_do_switch_mm) .macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2 adrp \tmp1, empty_zero_page phys_to_ttbr \tmp2, \tmp1 - offset_ttbr1 \tmp2 + offset_ttbr1 \tmp2, \tmp1 msr ttbr1_el1, \tmp2 isb tlbi vmalle1 @@ -187,7 +187,7 @@ ENTRY(idmap_cpu_replace_ttbr1) __idmap_cpu_set_reserved_ttbr1 x1, x3 - offset_ttbr1 x0 + offset_ttbr1 x0, x3 msr ttbr1_el1, x0 isb @@ -286,6 +286,15 @@ skip_pgd: msr sctlr_el1, x18 isb + /* + * Invalidate the local I-cache so that any instructions fetched + * speculatively from the PoC are discarded, since they may have + * been dynamically patched at the PoU. + */ + ic iallu + dsb nsh + isb + /* Set the flag to zero to indicate that we're all done */ str wzr, [flag_ptr] ret @@ -362,7 +371,7 @@ __idmap_kpti_secondary: cbnz w18, 1b /* All done, act like nothing happened */ - offset_ttbr1 swapper_ttb + offset_ttbr1 swapper_ttb, x18 msr ttbr1_el1, swapper_ttb isb ret @@ -438,10 +447,11 @@ ENTRY(__cpu_setup) TCR_TBI0 | TCR_A1 | TCR_KASAN_FLAGS tcr_clear_errata_bits x10, x9, x5 -#ifdef CONFIG_ARM64_USER_VA_BITS_52 - ldr_l x9, vabits_user +#ifdef CONFIG_ARM64_VA_BITS_52 + ldr_l x9, vabits_actual sub x9, xzr, x9 add x9, x9, #64 + tcr_set_t1sz x10, x9 #else ldr_l x9, idmap_t0sz #endif diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index c345b79414a9..403f7e193833 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -39,13 +39,11 @@ endif uname := $(shell uname -m) KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig -ifdef CONFIG_PPC64 new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi) ifeq ($(new_nm),y) NM := $(NM) --synthetic endif -endif # BITS is used as extension for files which are available in a 32 bit # and a 64 bit version to simplify shared Makefiles. diff --git a/arch/powerpc/include/asm/error-injection.h b/arch/powerpc/include/asm/error-injection.h deleted file mode 100644 index 62fd24739852..000000000000 --- a/arch/powerpc/include/asm/error-injection.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0+ */ - -#ifndef _ASM_ERROR_INJECTION_H -#define _ASM_ERROR_INJECTION_H - -#include <linux/compiler.h> -#include <linux/linkage.h> -#include <asm/ptrace.h> -#include <asm-generic/error-injection.h> - -void override_function_with_return(struct pt_regs *regs); - -#endif /* _ASM_ERROR_INJECTION_H */ diff --git a/arch/x86/include/asm/error-injection.h b/arch/x86/include/asm/error-injection.h deleted file mode 100644 index 47b7a1296245..000000000000 --- a/arch/x86/include/asm/error-injection.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_ERROR_INJECTION_H -#define _ASM_ERROR_INJECTION_H - -#include <linux/compiler.h> -#include <linux/linkage.h> -#include <asm/ptrace.h> -#include <asm-generic/error-injection.h> - -asmlinkage void just_return_func(void); -void override_function_with_return(struct pt_regs *regs); - -#endif /* _ASM_ERROR_INJECTION_H */ diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 8569b79e8b58..5a7551d060f2 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1256,12 +1256,12 @@ static int __init arm_smmu_v3_set_proximity(struct device *dev, smmu = (struct acpi_iort_smmu_v3 *)node->node_data; if (smmu->flags & ACPI_IORT_SMMU_V3_PXM_VALID) { - int node = acpi_map_pxm_to_node(smmu->pxm); + int dev_node = acpi_map_pxm_to_node(smmu->pxm); - if (node != NUMA_NO_NODE && !node_online(node)) + if (dev_node != NUMA_NO_NODE && !node_online(dev_node)) return -EINVAL; - set_dev_node(dev, node); + set_dev_node(dev, dev_node); pr_info("SMMU-v3[%llx] Mapped to Proximity domain %d\n", smmu->base_address, smmu->pxm); diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 3e866885a405..2794f4b3f62d 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -573,3 +573,12 @@ config RANDOM_TRUST_CPU has not installed a hidden back door to compromise the CPU's random number generation facilities. This can also be configured at boot with "random.trust_cpu=on/off". + +config RANDOM_TRUST_BOOTLOADER + bool "Trust the bootloader to initialize Linux's CRNG" + help + Some bootloaders can provide entropy to increase the kernel's initial + device randomness. Say Y here to assume the entropy provided by the + booloader is trustworthy so it will be added to the kernel's entropy + pool. Otherwise, say N here so it will be regarded as device input that + only mixes the entropy pool.
\ No newline at end of file diff --git a/drivers/char/random.c b/drivers/char/random.c index 5d5ea4ce1442..566922df4b7b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2445,3 +2445,17 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, credit_entropy_bits(poolp, entropy); } EXPORT_SYMBOL_GPL(add_hwgenerator_randomness); + +/* Handle random seed passed by bootloader. + * If the seed is trustworthy, it would be regarded as hardware RNGs. Otherwise + * it would be regarded as device data. + * The decision is controlled by CONFIG_RANDOM_TRUST_BOOTLOADER. + */ +void add_bootloader_randomness(const void *buf, unsigned int size) +{ + if (IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER)) + add_hwgenerator_randomness(buf, size, size * 8); + else + add_device_randomness(buf, size); +} +EXPORT_SYMBOL_GPL(add_bootloader_randomness);
\ No newline at end of file diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm index 48cb3d4bb7d1..d8530475493c 100644 --- a/drivers/cpuidle/Kconfig.arm +++ b/drivers/cpuidle/Kconfig.arm @@ -13,6 +13,16 @@ config ARM_CPUIDLE initialized by calling the CPU operations init idle hook provided by architecture code. +config ARM_PSCI_CPUIDLE + bool "PSCI CPU idle Driver" + depends on ARM_PSCI_FW + select DT_IDLE_STATES + select CPU_IDLE_MULTIPLE_DRIVERS + help + Select this to enable PSCI firmware based CPUidle driver for ARM. + It provides an idle driver that is capable of detecting and + managing idle states through the PSCI firmware interface. + config ARM_BIG_LITTLE_CPUIDLE bool "Support for ARM big.LITTLE processors" depends on ARCH_VEXPRESS_TC2_PM || ARCH_EXYNOS diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index 9d7176cee3d3..40d016339b29 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_ARM_U8500_CPUIDLE) += cpuidle-ux500.o obj-$(CONFIG_ARM_AT91_CPUIDLE) += cpuidle-at91.o obj-$(CONFIG_ARM_EXYNOS_CPUIDLE) += cpuidle-exynos.o obj-$(CONFIG_ARM_CPUIDLE) += cpuidle-arm.o +obj-$(CONFIG_ARM_PSCI_CPUIDLE) += cpuidle-psci.o ############################################################################### # MIPS drivers diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c index 5bcd82c35dcf..9e5156d39627 100644 --- a/drivers/cpuidle/cpuidle-arm.c +++ b/drivers/cpuidle/cpuidle-arm.c @@ -15,7 +15,6 @@ #include <linux/module.h> #include <linux/of.h> #include <linux/slab.h> -#include <linux/topology.h> #include <asm/cpuidle.h> @@ -106,11 +105,17 @@ static int __init arm_idle_init_cpu(int cpu) ret = arm_cpuidle_init(cpu); /* - * Allow the initialization to continue for other CPUs, if the reported - * failure is a HW misconfiguration/breakage (-ENXIO). + * Allow the initialization to continue for other CPUs, if the + * reported failure is a HW misconfiguration/breakage (-ENXIO). + * + * Some platforms do not support idle operations + * (arm_cpuidle_init() returning -EOPNOTSUPP), we should + * not flag this case as an error, it is a valid + * configuration. */ if (ret) { - pr_err("CPU %d failed to init idle CPU ops\n", cpu); + if (ret != -EOPNOTSUPP) + pr_err("CPU %d failed to init idle CPU ops\n", cpu); ret = ret == -ENXIO ? 0 : ret; goto out_kfree_drv; } diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c new file mode 100644 index 000000000000..f3c1a2396f98 --- /dev/null +++ b/drivers/cpuidle/cpuidle-psci.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PSCI CPU idle driver. + * + * Copyright (C) 2019 ARM Ltd. + * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> + */ + +#define pr_fmt(fmt) "CPUidle PSCI: " fmt + +#include <linux/cpuidle.h> +#include <linux/cpumask.h> +#include <linux/cpu_pm.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/psci.h> +#include <linux/slab.h> + +#include <asm/cpuidle.h> + +#include "dt_idle_states.h" + +static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state); + +static int psci_enter_idle_state(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int idx) +{ + u32 *state = __this_cpu_read(psci_power_state); + + return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, + idx, state[idx - 1]); +} + +static struct cpuidle_driver psci_idle_driver __initdata = { + .name = "psci_idle", + .owner = THIS_MODULE, + /* + * PSCI idle states relies on architectural WFI to + * be represented as state index 0. + */ + .states[0] = { + .enter = psci_enter_idle_state, + .exit_latency = 1, + .target_residency = 1, + .power_usage = UINT_MAX, + .name = "WFI", + .desc = "ARM WFI", + } +}; + +static const struct of_device_id psci_idle_state_match[] __initconst = { + { .compatible = "arm,idle-state", + .data = psci_enter_idle_state }, + { }, +}; + +static int __init psci_dt_parse_state_node(struct device_node *np, u32 *state) +{ + int err = of_property_read_u32(np, "arm,psci-suspend-param", state); + + if (err) { + pr_warn("%pOF missing arm,psci-suspend-param property\n", np); + return err; + } + + if (!psci_power_state_is_valid(*state)) { + pr_warn("Invalid PSCI power state %#x\n", *state); + return -EINVAL; + } + + return 0; +} + +static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu) +{ + int i, ret = 0, count = 0; + u32 *psci_states; + struct device_node *state_node; + + /* Count idle states */ + while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states", + count))) { + count++; + of_node_put(state_node); + } + + if (!count) + return -ENODEV; + + psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL); + if (!psci_states) + return -ENOMEM; + + for (i = 0; i < count; i++) { + state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i); + ret = psci_dt_parse_state_node(state_node, &psci_states[i]); + of_node_put(state_node); + + if (ret) + goto free_mem; + + pr_debug("psci-power-state %#x index %d\n", psci_states[i], i); + } + + /* Idle states parsed correctly, initialize per-cpu pointer */ + per_cpu(psci_power_state, cpu) = psci_states; + return 0; + +free_mem: + kfree(psci_states); + return ret; +} + +static __init int psci_cpu_init_idle(unsigned int cpu) +{ + struct device_node *cpu_node; + int ret; + + /* + * If the PSCI cpu_suspend function hook has not been initialized + * idle states must not be enabled, so bail out + */ + if (!psci_ops.cpu_suspend) + return -EOPNOTSUPP; + + cpu_node = of_cpu_device_node_get(cpu); + if (!cpu_node) + return -ENODEV; + + ret = psci_dt_cpu_init_idle(cpu_node, cpu); + + of_node_put(cpu_node); + + return ret; +} + +static int __init psci_idle_init_cpu(int cpu) +{ + struct cpuidle_driver *drv; + struct device_node *cpu_node; + const char *enable_method; + int ret = 0; + + cpu_node = of_cpu_device_node_get(cpu); + if (!cpu_node) + return -ENODEV; + + /* + * Check whether the enable-method for the cpu is PSCI, fail + * if it is not. + */ + enable_method = of_get_property(cpu_node, "enable-method", NULL); + if (!enable_method || (strcmp(enable_method, "psci"))) + ret = -ENODEV; + + of_node_put(cpu_node); + if (ret) + return ret; + + drv = kmemdup(&psci_idle_driver, sizeof(*drv), GFP_KERNEL); + if (!drv) + return -ENOMEM; + + drv->cpumask = (struct cpumask *)cpumask_of(cpu); + + /* + * Initialize idle states data, starting at index 1, since + * by default idle state 0 is the quiescent state reached + * by the cpu by executing the wfi instruction. + * + * If no DT idle states are detected (ret == 0) let the driver + * initialization fail accordingly since there is no reason to + * initialize the idle driver if only wfi is supported, the + * default archictectural back-end already executes wfi + * on idle entry. + */ + ret = dt_init_idle_driver(drv, psci_idle_state_match, 1); + if (ret <= 0) { + ret = ret ? : -ENODEV; + goto out_kfree_drv; + } + + /* + * Initialize PSCI idle states. + */ + ret = psci_cpu_init_idle(cpu); + if (ret) { + pr_err("CPU %d failed to PSCI idle\n", cpu); + goto out_kfree_drv; + } + + ret = cpuidle_register(drv, NULL); + if (ret) + goto out_kfree_drv; + + return 0; + +out_kfree_drv: + kfree(drv); + return ret; +} + +/* + * psci_idle_init - Initializes PSCI cpuidle driver + * + * Initializes PSCI cpuidle driver for all CPUs, if any CPU fails + * to register cpuidle driver then rollback to cancel all CPUs + * registration. + */ +static int __init psci_idle_init(void) +{ + int cpu, ret; + struct cpuidle_driver *drv; + struct cpuidle_device *dev; + + for_each_possible_cpu(cpu) { + ret = psci_idle_init_cpu(cpu); + if (ret) + goto out_fail; + } + + return 0; + +out_fail: + while (--cpu >= 0) { + dev = per_cpu(cpuidle_devices, cpu); + drv = cpuidle_get_cpu_driver(dev); + cpuidle_unregister(drv); + kfree(drv); + } + + return ret; +} +device_initcall(psci_idle_init); diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index f82ccd39a913..84f4ff351c62 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -103,7 +103,7 @@ static inline bool psci_power_state_loses_context(u32 state) return state & mask; } -static inline bool psci_power_state_is_valid(u32 state) +bool psci_power_state_is_valid(u32 state) { const u32 valid_mask = psci_has_ext_power_state() ? PSCI_1_0_EXT_POWER_STATE_MASK : @@ -277,175 +277,24 @@ static int __init psci_features(u32 psci_func_id) } #ifdef CONFIG_CPU_IDLE -static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state); - -static int psci_dt_parse_state_node(struct device_node *np, u32 *state) -{ - int err = of_property_read_u32(np, "arm,psci-suspend-param", state); - - if (err) { - pr_warn("%pOF missing arm,psci-suspend-param property\n", np); - return err; - } - - if (!psci_power_state_is_valid(*state)) { - pr_warn("Invalid PSCI power state %#x\n", *state); - return -EINVAL; - } - - return 0; -} - -static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu) -{ - int i, ret = 0, count = 0; - u32 *psci_states; - struct device_node *state_node; - - /* Count idle states */ - while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states", - count))) { - count++; - of_node_put(state_node); - } - - if (!count) - return -ENODEV; - - psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL); - if (!psci_states) - return -ENOMEM; - - for (i = 0; i < count; i++) { - state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i); - ret = psci_dt_parse_state_node(state_node, &psci_states[i]); - of_node_put(state_node); - - if (ret) - goto free_mem; - - pr_debug("psci-power-state %#x index %d\n", psci_states[i], i); - } - - /* Idle states parsed correctly, initialize per-cpu pointer */ - per_cpu(psci_power_state, cpu) = psci_states; - return 0; - -free_mem: - kfree(psci_states); - return ret; -} - -#ifdef CONFIG_ACPI -#include <acpi/processor.h> - -static int __maybe_unused psci_acpi_cpu_init_idle(unsigned int cpu) -{ - int i, count; - u32 *psci_states; - struct acpi_lpi_state *lpi; - struct acpi_processor *pr = per_cpu(processors, cpu); - - if (unlikely(!pr || !pr->flags.has_lpi)) - return -EINVAL; - - count = pr->power.count - 1; - if (count <= 0) - return -ENODEV; - - psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL); - if (!psci_states) - return -ENOMEM; - - for (i = 0; i < count; i++) { - u32 state; - - lpi = &pr->power.lpi_states[i + 1]; - /* - * Only bits[31:0] represent a PSCI power_state while - * bits[63:32] must be 0x0 as per ARM ACPI FFH Specification - */ - state = lpi->address; - if (!psci_power_state_is_valid(state)) { - pr_warn("Invalid PSCI power state %#x\n", state); - kfree(psci_states); - return -EINVAL; - } - psci_states[i] = state; - } - /* Idle states parsed correctly, initialize per-cpu pointer */ - per_cpu(psci_power_state, cpu) = psci_states; - return 0; -} -#else -static int __maybe_unused psci_acpi_cpu_init_idle(unsigned int cpu) -{ - return -EINVAL; -} -#endif - -int psci_cpu_init_idle(unsigned int cpu) -{ - struct device_node *cpu_node; - int ret; - - /* - * If the PSCI cpu_suspend function hook has not been initialized - * idle states must not be enabled, so bail out - */ - if (!psci_ops.cpu_suspend) - return -EOPNOTSUPP; - - if (!acpi_disabled) - return psci_acpi_cpu_init_idle(cpu); - - cpu_node = of_get_cpu_node(cpu, NULL); - if (!cpu_node) - return -ENODEV; - - ret = psci_dt_cpu_init_idle(cpu_node, cpu); - - of_node_put(cpu_node); - - return ret; -} - -static int psci_suspend_finisher(unsigned long index) +static int psci_suspend_finisher(unsigned long state) { - u32 *state = __this_cpu_read(psci_power_state); + u32 power_state = state; - return psci_ops.cpu_suspend(state[index - 1], - __pa_symbol(cpu_resume)); + return psci_ops.cpu_suspend(power_state, __pa_symbol(cpu_resume)); } -int psci_cpu_suspend_enter(unsigned long index) +int psci_cpu_suspend_enter(u32 state) { int ret; - u32 *state = __this_cpu_read(psci_power_state); - /* - * idle state index 0 corresponds to wfi, should never be called - * from the cpu_suspend operations - */ - if (WARN_ON_ONCE(!index)) - return -EINVAL; - if (!psci_power_state_loses_context(state[index - 1])) - ret = psci_ops.cpu_suspend(state[index - 1], 0); + if (!psci_power_state_loses_context(state)) + ret = psci_ops.cpu_suspend(state, 0); else - ret = cpu_suspend(index, psci_suspend_finisher); + ret = cpu_suspend(state, psci_suspend_finisher); return ret; } - -/* ARM specific CPU idle operations */ -#ifdef CONFIG_ARM -static const struct cpuidle_ops psci_cpuidle_ops __initconst = { - .suspend = psci_cpu_suspend_enter, - .init = psci_dt_cpu_init_idle, -}; - -CPUIDLE_METHOD_OF_DECLARE(psci, "psci", &psci_cpuidle_ops); -#endif #endif static int psci_system_suspend(unsigned long unused) diff --git a/drivers/firmware/psci/psci_checker.c b/drivers/firmware/psci/psci_checker.c index f3659443f8c2..6a445397771c 100644 --- a/drivers/firmware/psci/psci_checker.c +++ b/drivers/firmware/psci/psci_checker.c @@ -228,8 +228,11 @@ out_free_cpus: static void dummy_callback(struct timer_list *unused) {} -static int suspend_cpu(int index, bool broadcast) +static int suspend_cpu(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { + struct cpuidle_state *state = &drv->states[index]; + bool broadcast = state->flags & CPUIDLE_FLAG_TIMER_STOP; int ret; arch_cpu_idle_enter(); @@ -254,11 +257,7 @@ static int suspend_cpu(int index, bool broadcast) } } - /* - * Replicate the common ARM cpuidle enter function - * (arm_enter_idle_state). - */ - ret = CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, index); + ret = state->enter(dev, drv, index); if (broadcast) tick_broadcast_exit(); @@ -301,9 +300,8 @@ static int suspend_test_thread(void *arg) * doesn't use PSCI). */ for (index = 1; index < drv->state_count; ++index) { - struct cpuidle_state *state = &drv->states[index]; - bool broadcast = state->flags & CPUIDLE_FLAG_TIMER_STOP; int ret; + struct cpuidle_state *state = &drv->states[index]; /* * Set the timer to wake this CPU up in some time (which @@ -318,7 +316,7 @@ static int suspend_test_thread(void *arg) /* IRQs must be disabled during suspend operations. */ local_irq_disable(); - ret = suspend_cpu(index, broadcast); + ret = suspend_cpu(dev, drv, index); /* * We have woken up. Re-enable IRQs to handle any diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 9cdf14b9aaab..223d617ecfe1 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -24,6 +24,7 @@ #include <linux/debugfs.h> #include <linux/serial_core.h> #include <linux/sysfs.h> +#include <linux/random.h> #include <asm/setup.h> /* for COMMAND_LINE_SIZE */ #include <asm/page.h> @@ -1044,6 +1045,7 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, { int l; const char *p; + const void *rng_seed; pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname); @@ -1078,6 +1080,18 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, pr_debug("Command line is: %s\n", (char*)data); + rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l); + if (rng_seed && l > 0) { + add_bootloader_randomness(rng_seed, l); + + /* try to clear seed so it won't be found. */ + fdt_nop_property(initial_boot_params, node, "rng-seed"); + + /* update CRC check value */ + of_fdt_crc32 = crc32_be(~0, initial_boot_params, + fdt_totalsize(initial_boot_params)); + } + /* break now */ return 1; } diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index da71c741cb46..abcf54f7d19c 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -113,8 +113,6 @@ struct smmu_pmu { u64 counter_mask; u32 options; bool global_filter; - u32 global_filter_span; - u32 global_filter_sid; }; #define to_smmu_pmu(p) (container_of(p, struct smmu_pmu, pmu)) @@ -260,6 +258,19 @@ static void smmu_pmu_set_event_filter(struct perf_event *event, smmu_pmu_set_smr(smmu_pmu, idx, sid); } +static bool smmu_pmu_check_global_filter(struct perf_event *curr, + struct perf_event *new) +{ + if (get_filter_enable(new) != get_filter_enable(curr)) + return false; + + if (!get_filter_enable(new)) + return true; + + return get_filter_span(new) == get_filter_span(curr) && + get_filter_stream_id(new) == get_filter_stream_id(curr); +} + static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu, struct perf_event *event, int idx) { @@ -279,17 +290,14 @@ static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu, } /* Requested settings same as current global settings*/ - if (span == smmu_pmu->global_filter_span && - sid == smmu_pmu->global_filter_sid) + idx = find_first_bit(smmu_pmu->used_counters, num_ctrs); + if (idx == num_ctrs || + smmu_pmu_check_global_filter(smmu_pmu->events[idx], event)) { + smmu_pmu_set_event_filter(event, 0, span, sid); return 0; + } - if (!bitmap_empty(smmu_pmu->used_counters, num_ctrs)) - return -EAGAIN; - - smmu_pmu_set_event_filter(event, 0, span, sid); - smmu_pmu->global_filter_span = span; - smmu_pmu->global_filter_sid = sid; - return 0; + return -EAGAIN; } static int smmu_pmu_get_event_idx(struct smmu_pmu *smmu_pmu, @@ -312,6 +320,19 @@ static int smmu_pmu_get_event_idx(struct smmu_pmu *smmu_pmu, return idx; } +static bool smmu_pmu_events_compatible(struct perf_event *curr, + struct perf_event *new) +{ + if (new->pmu != curr->pmu) + return false; + + if (to_smmu_pmu(new->pmu)->global_filter && + !smmu_pmu_check_global_filter(curr, new)) + return false; + + return true; +} + /* * Implementation of abstract pmu functionality required by * the core perf events code. @@ -323,6 +344,7 @@ static int smmu_pmu_event_init(struct perf_event *event) struct smmu_pmu *smmu_pmu = to_smmu_pmu(event->pmu); struct device *dev = smmu_pmu->dev; struct perf_event *sibling; + int group_num_events = 1; u16 event_id; if (event->attr.type != event->pmu->type) @@ -347,18 +369,23 @@ static int smmu_pmu_event_init(struct perf_event *event) } /* Don't allow groups with mixed PMUs, except for s/w events */ - if (event->group_leader->pmu != event->pmu && - !is_software_event(event->group_leader)) { - dev_dbg(dev, "Can't create mixed PMU group\n"); - return -EINVAL; + if (!is_software_event(event->group_leader)) { + if (!smmu_pmu_events_compatible(event->group_leader, event)) + return -EINVAL; + + if (++group_num_events > smmu_pmu->num_counters) + return -EINVAL; } for_each_sibling_event(sibling, event->group_leader) { - if (sibling->pmu != event->pmu && - !is_software_event(sibling)) { - dev_dbg(dev, "Can't create mixed PMU group\n"); + if (is_software_event(sibling)) + continue; + + if (!smmu_pmu_events_compatible(sibling, event)) + return -EINVAL; + + if (++group_num_events > smmu_pmu->num_counters) return -EINVAL; - } } hwc->idx = -1; diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 63fe21600072..ce7345745b42 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -35,6 +35,8 @@ #define EVENT_CYCLES_COUNTER 0 #define NUM_COUNTERS 4 +#define AXI_MASKING_REVERT 0xffff0000 /* AXI_MASKING(MSB 16bits) + AXI_ID(LSB 16bits) */ + #define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu) #define DDR_PERF_DEV_NAME "imx8_ddr" @@ -42,11 +44,25 @@ static DEFINE_IDA(ddr_ida); +/* DDR Perf hardware feature */ +#define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */ + +struct fsl_ddr_devtype_data { + unsigned int quirks; /* quirks needed for different DDR Perf core */ +}; + +static const struct fsl_ddr_devtype_data imx8_devtype_data; + +static const struct fsl_ddr_devtype_data imx8m_devtype_data = { + .quirks = DDR_CAP_AXI_ID_FILTER, +}; + static const struct of_device_id imx_ddr_pmu_dt_ids[] = { - { .compatible = "fsl,imx8-ddr-pmu",}, - { .compatible = "fsl,imx8m-ddr-pmu",}, + { .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data}, + { .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data}, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids); struct ddr_pmu { struct pmu pmu; @@ -57,6 +73,7 @@ struct ddr_pmu { struct perf_event *events[NUM_COUNTERS]; int active_events; enum cpuhp_state cpuhp_state; + const struct fsl_ddr_devtype_data *devtype_data; int irq; int id; }; @@ -128,6 +145,8 @@ static struct attribute *ddr_perf_events_attrs[] = { IMX8_DDR_PMU_EVENT_ATTR(refresh, 0x37), IMX8_DDR_PMU_EVENT_ATTR(write, 0x38), IMX8_DDR_PMU_EVENT_ATTR(raw-hazard, 0x39), + IMX8_DDR_PMU_EVENT_ATTR(axid-read, 0x41), + IMX8_DDR_PMU_EVENT_ATTR(axid-write, 0x42), NULL, }; @@ -137,9 +156,13 @@ static struct attribute_group ddr_perf_events_attr_group = { }; PMU_FORMAT_ATTR(event, "config:0-7"); +PMU_FORMAT_ATTR(axi_id, "config1:0-15"); +PMU_FORMAT_ATTR(axi_mask, "config1:16-31"); static struct attribute *ddr_perf_format_attrs[] = { &format_attr_event.attr, + &format_attr_axi_id.attr, + &format_attr_axi_mask.attr, NULL, }; @@ -189,6 +212,26 @@ static u32 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter) return readl_relaxed(pmu->base + COUNTER_READ + counter * 4); } +static bool ddr_perf_is_filtered(struct perf_event *event) +{ + return event->attr.config == 0x41 || event->attr.config == 0x42; +} + +static u32 ddr_perf_filter_val(struct perf_event *event) +{ + return event->attr.config1; +} + +static bool ddr_perf_filters_compatible(struct perf_event *a, + struct perf_event *b) +{ + if (!ddr_perf_is_filtered(a)) + return true; + if (!ddr_perf_is_filtered(b)) + return true; + return ddr_perf_filter_val(a) == ddr_perf_filter_val(b); +} + static int ddr_perf_event_init(struct perf_event *event) { struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); @@ -215,6 +258,15 @@ static int ddr_perf_event_init(struct perf_event *event) !is_software_event(event->group_leader)) return -EINVAL; + if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) { + if (!ddr_perf_filters_compatible(event, event->group_leader)) + return -EINVAL; + for_each_sibling_event(sibling, event->group_leader) { + if (!ddr_perf_filters_compatible(event, sibling)) + return -EINVAL; + } + } + for_each_sibling_event(sibling, event->group_leader) { if (sibling->pmu != event->pmu && !is_software_event(sibling)) @@ -287,6 +339,23 @@ static int ddr_perf_event_add(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int counter; int cfg = event->attr.config; + int cfg1 = event->attr.config1; + + if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER) { + int i; + + for (i = 1; i < NUM_COUNTERS; i++) { + if (pmu->events[i] && + !ddr_perf_filters_compatible(event, pmu->events[i])) + return -EINVAL; + } + + if (ddr_perf_is_filtered(event)) { + /* revert axi id masking(axi_mask) value */ + cfg1 ^= AXI_MASKING_REVERT; + writel(cfg1, pmu->base + COUNTER_DPCR1); + } + } counter = ddr_perf_alloc_counter(pmu, cfg); if (counter < 0) { @@ -472,6 +541,8 @@ static int ddr_perf_probe(struct platform_device *pdev) if (!name) return -ENOMEM; + pmu->devtype_data = of_device_get_match_data(&pdev->dev); + pmu->cpu = raw_smp_processor_id(); ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DDR_CPUHP_CB_NAME, diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index 6ad0823bcf23..e42d4464c2cf 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -217,10 +217,8 @@ static int hisi_ddrc_pmu_init_irq(struct hisi_pmu *ddrc_pmu, /* Read and init IRQ */ irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "DDRC PMU get irq fail; irq:%d\n", irq); + if (irq < 0) return irq; - } ret = devm_request_irq(&pdev->dev, irq, hisi_ddrc_pmu_isr, IRQF_NOBALANCING | IRQF_NO_THREAD, diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index 4f2917f3e25e..f28063873e11 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -207,10 +207,8 @@ static int hisi_hha_pmu_init_irq(struct hisi_pmu *hha_pmu, /* Read and init IRQ */ irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "HHA PMU get irq fail; irq:%d\n", irq); + if (irq < 0) return irq; - } ret = devm_request_irq(&pdev->dev, irq, hisi_hha_pmu_isr, IRQF_NOBALANCING | IRQF_NO_THREAD, diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 9153e093f9df..078b8dc57250 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -206,10 +206,8 @@ static int hisi_l3c_pmu_init_irq(struct hisi_pmu *l3c_pmu, /* Read and init IRQ */ irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "L3C PMU get irq fail; irq:%d\n", irq); + if (irq < 0) return irq; - } ret = devm_request_irq(&pdev->dev, irq, hisi_l3c_pmu_isr, IRQF_NOBALANCING | IRQF_NO_THREAD, diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c index d06182fe14b8..21d6991dbe0b 100644 --- a/drivers/perf/qcom_l2_pmu.c +++ b/drivers/perf/qcom_l2_pmu.c @@ -909,12 +909,8 @@ static int l2_cache_pmu_probe_cluster(struct device *dev, void *data) cluster->cluster_id = fw_cluster_id; irq = platform_get_irq(sdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, - "Failed to get valid irq for cluster %ld\n", - fw_cluster_id); + if (irq < 0) return irq; - } irq_set_status_flags(irq, IRQ_NOAUTOEN); cluster->irq = irq; diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 3259e2ebeb39..7e328d6385c3 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1901,10 +1901,8 @@ static int xgene_pmu_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); - if (irq < 0) { - dev_err(&pdev->dev, "No IRQ resource\n"); + if (irq < 0) return -EINVAL; - } rc = devm_request_irq(&pdev->dev, irq, xgene_pmu_isr, IRQF_NOBALANCING | IRQF_NO_THREAD, diff --git a/include/asm-generic/error-injection.h b/include/asm-generic/error-injection.h index 95a159a4137f..80ca61058dd2 100644 --- a/include/asm-generic/error-injection.h +++ b/include/asm-generic/error-injection.h @@ -16,6 +16,8 @@ struct error_injection_entry { int etype; }; +struct pt_regs; + #ifdef CONFIG_FUNCTION_ERROR_INJECTION /* * Whitelist ganerating macro. Specify functions which can be @@ -28,8 +30,12 @@ static struct error_injection_entry __used \ .addr = (unsigned long)fname, \ .etype = EI_ETYPE_##_etype, \ }; + +void override_function_with_return(struct pt_regs *regs); #else #define ALLOW_ERROR_INJECTION(fname, _etype) + +static inline void override_function_with_return(struct pt_regs *regs) { } #endif #endif diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index bb9a0db89f1a..12ae4b87494e 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -256,7 +256,10 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov) {return 0;} #endif -#define __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, is_retention) \ +#define __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, \ + idx, \ + state, \ + is_retention) \ ({ \ int __ret = 0; \ \ @@ -268,7 +271,7 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov) if (!is_retention) \ __ret = cpu_pm_enter(); \ if (!__ret) { \ - __ret = low_level_idle_enter(idx); \ + __ret = low_level_idle_enter(state); \ if (!is_retention) \ cpu_pm_exit(); \ } \ @@ -277,9 +280,15 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov) }) #define CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx) \ - __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, 0) + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 0) #define CPU_PM_CPU_IDLE_ENTER_RETENTION(low_level_idle_enter, idx) \ - __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, 1) + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, idx, 1) + +#define CPU_PM_CPU_IDLE_ENTER_PARAM(low_level_idle_enter, idx, state) \ + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 0) + +#define CPU_PM_CPU_IDLE_ENTER_RETENTION_PARAM(low_level_idle_enter, idx, state) \ + __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, state, 1) #endif /* _LINUX_CPUIDLE_H */ diff --git a/include/linux/error-injection.h b/include/linux/error-injection.h index 280c61ecbf20..635a95caf29f 100644 --- a/include/linux/error-injection.h +++ b/include/linux/error-injection.h @@ -2,16 +2,16 @@ #ifndef _LINUX_ERROR_INJECTION_H #define _LINUX_ERROR_INJECTION_H -#ifdef CONFIG_FUNCTION_ERROR_INJECTION +#include <linux/compiler.h> +#include <asm-generic/error-injection.h> -#include <asm/error-injection.h> +#ifdef CONFIG_FUNCTION_ERROR_INJECTION extern bool within_error_injection_list(unsigned long addr); extern int get_injectable_error_type(unsigned long addr); #else /* !CONFIG_FUNCTION_ERROR_INJECTION */ -#include <asm-generic/error-injection.h> static inline bool within_error_injection_list(unsigned long addr) { return false; diff --git a/include/linux/psci.h b/include/linux/psci.h index a8a15613c157..e2bacc6fd2f2 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -15,8 +15,8 @@ bool psci_tos_resident_on(int cpu); -int psci_cpu_init_idle(unsigned int cpu); -int psci_cpu_suspend_enter(unsigned long index); +int psci_cpu_suspend_enter(u32 state); +bool psci_power_state_is_valid(u32 state); enum psci_conduit { PSCI_CONDUIT_NONE, diff --git a/include/linux/random.h b/include/linux/random.h index 1f7dced2bba6..f189c927fdea 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -19,6 +19,7 @@ struct random_ready_callback { }; extern void add_device_randomness(const void *, unsigned int); +extern void add_bootloader_randomness(const void *, unsigned int); #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__) static inline void add_latent_entropy(void) diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 094bb03b9cc2..2e927b3e9d6c 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -229,4 +229,9 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) +/* Tagged user address controls for arm64 */ +#define PR_SET_TAGGED_ADDR_CTRL 55 +#define PR_GET_TAGGED_ADDR_CTRL 56 +# define PR_TAGGED_ADDR_ENABLE (1UL << 0) + #endif /* _LINUX_PRCTL_H */ diff --git a/init/Kconfig b/init/Kconfig index bd7d650d4a99..d96127ebc44e 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -30,6 +30,9 @@ config CC_CAN_LINK config CC_HAS_ASM_GOTO def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC)) +config TOOLS_SUPPORT_RELR + def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh) + config CC_HAS_WARN_MAYBE_UNINITIALIZED def_bool $(cc-option,-Wmaybe-uninitialized) help diff --git a/kernel/sys.c b/kernel/sys.c index 2969304c29fe..ec48396b4943 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -124,6 +124,12 @@ #ifndef PAC_RESET_KEYS # define PAC_RESET_KEYS(a, b) (-EINVAL) #endif +#ifndef SET_TAGGED_ADDR_CTRL +# define SET_TAGGED_ADDR_CTRL(a) (-EINVAL) +#endif +#ifndef GET_TAGGED_ADDR_CTRL +# define GET_TAGGED_ADDR_CTRL() (-EINVAL) +#endif /* * this is where the system-wide overflow UID and GID are defined, for @@ -2492,6 +2498,16 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; error = PAC_RESET_KEYS(me, arg2); break; + case PR_SET_TAGGED_ADDR_CTRL: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = SET_TAGGED_ADDR_CTRL(arg2); + break; + case PR_GET_TAGGED_ADDR_CTRL: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + error = GET_TAGGED_ADDR_CTRL(); + break; default: error = -EINVAL; break; diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 6410bd22fe38..03757cc60e06 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -1,4 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 +ifdef CONFIG_KASAN +CFLAGS_KASAN_NOSANITIZE := -fno-builtin +KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET) +endif + ifdef CONFIG_KASAN_GENERIC ifdef CONFIG_KASAN_INLINE @@ -7,8 +12,6 @@ else call_threshold := 0 endif -KASAN_SHADOW_OFFSET ?= $(CONFIG_KASAN_SHADOW_OFFSET) - CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address cc-param = $(call cc-option, -mllvm -$(1), $(call cc-option, --param $(1))) @@ -45,7 +48,3 @@ CFLAGS_KASAN := -fsanitize=kernel-hwaddress \ $(instrumentation_flags) endif # CONFIG_KASAN_SW_TAGS - -ifdef CONFIG_KASAN -CFLAGS_KASAN_NOSANITIZE := -fno-builtin -endif diff --git a/scripts/tools-support-relr.sh b/scripts/tools-support-relr.sh new file mode 100755 index 000000000000..97a2c844a95e --- /dev/null +++ b/scripts/tools-support-relr.sh @@ -0,0 +1,16 @@ +#!/bin/sh -eu +# SPDX-License-Identifier: GPL-2.0 + +tmp_file=$(mktemp) +trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT + +cat << "END" | "$CC" -c -x c - -o $tmp_file.o >/dev/null 2>&1 +void *p = &p; +END +"$LD" $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file + +# Despite printing an error message, GNU nm still exits with exit code 0 if it +# sees a relr section. So we need to check that nothing is printed to stderr. +test -z "$("$NM" $tmp_file 2>&1 >/dev/null)" + +"$OBJCOPY" -O binary $tmp_file $tmp_file.bin diff --git a/tools/testing/selftests/arm64/.gitignore b/tools/testing/selftests/arm64/.gitignore new file mode 100644 index 000000000000..e8fae8d61ed6 --- /dev/null +++ b/tools/testing/selftests/arm64/.gitignore @@ -0,0 +1 @@ +tags_test diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile new file mode 100644 index 000000000000..a61b2e743e99 --- /dev/null +++ b/tools/testing/selftests/arm64/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +# ARCH can be overridden by the user for cross compiling +ARCH ?= $(shell uname -m 2>/dev/null || echo not) + +ifneq (,$(filter $(ARCH),aarch64 arm64)) +TEST_GEN_PROGS := tags_test +TEST_PROGS := run_tags_test.sh +endif + +include ../lib.mk diff --git a/tools/testing/selftests/arm64/run_tags_test.sh b/tools/testing/selftests/arm64/run_tags_test.sh new file mode 100755 index 000000000000..745f11379930 --- /dev/null +++ b/tools/testing/selftests/arm64/run_tags_test.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +echo "--------------------" +echo "running tags test" +echo "--------------------" +./tags_test +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi diff --git a/tools/testing/selftests/arm64/tags_test.c b/tools/testing/selftests/arm64/tags_test.c new file mode 100644 index 000000000000..5701163460ef --- /dev/null +++ b/tools/testing/selftests/arm64/tags_test.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdint.h> +#include <sys/prctl.h> +#include <sys/utsname.h> + +#define SHIFT_TAG(tag) ((uint64_t)(tag) << 56) +#define SET_TAG(ptr, tag) (((uint64_t)(ptr) & ~SHIFT_TAG(0xff)) | \ + SHIFT_TAG(tag)) + +int main(void) +{ + static int tbi_enabled = 0; + unsigned long tag = 0; + struct utsname *ptr; + int err; + + if (prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0) == 0) + tbi_enabled = 1; + ptr = (struct utsname *)malloc(sizeof(*ptr)); + if (tbi_enabled) + tag = 0x42; + ptr = (struct utsname *)SET_TAG(ptr, tag); + err = uname(ptr); + free(ptr); + + return err; +} |