From 366db3ac3cdf97e90695282b959c75d5ea58cf00 Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Wed, 21 Apr 2021 17:02:20 +0200 Subject: arm64: dts: renesas: aistarvision-mipi-adapter-2.1: Fix CSI40 ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the DTS schema by explicitly stating that the input is port@0. This fixes a schema validation error but has no runtime effect as the default port number is 0 if not specified. Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20210421150221.3202955-2-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Geert Uytterhoeven --- arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts index e7b4a929bb17..2e3d1981cac4 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts +++ b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts @@ -33,7 +33,7 @@ status = "okay"; ports { - port { + port@0 { csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; -- cgit From 0a96c05995ef1085f9c5e6bf005a04915dd2ec6f Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Wed, 21 Apr 2021 17:02:21 +0200 Subject: arm64: dts: renesas: Add port@0 node for all CSI-2 nodes to dtsi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The port@0 is a mandatory port, add or move the declaration to the CSI-2 nodes top declared in dtsi files instead of depending on dts files adding them when describing the external connection. This fixes validation warnings for DTB outputs that do not connect all CSI-2 receivers to transmitters and thus declaring all port@0 nodes in dts files. Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20210421150221.3202955-3-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Geert Uytterhoeven --- .../hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi | 2 -- arch/arm64/boot/dts/renesas/r8a774a1.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a774b1.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a774c0.dtsi | 4 ++++ arch/arm64/boot/dts/renesas/r8a774e1.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77950.dtsi | 4 ++++ arch/arm64/boot/dts/renesas/r8a77951.dtsi | 12 ++++++++++++ arch/arm64/boot/dts/renesas/r8a77960.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77961.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77965.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77970.dtsi | 4 ++++ arch/arm64/boot/dts/renesas/r8a77980.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts | 2 -- arch/arm64/boot/dts/renesas/r8a77990.dtsi | 4 ++++ arch/arm64/boot/dts/renesas/salvator-common.dtsi | 3 --- 15 files changed, 84 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi index c62ddb9b2ba5..3771144a2ce4 100644 --- a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi +++ b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi @@ -14,7 +14,6 @@ ports { port@0 { - reg = <0>; csi20_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; @@ -29,7 +28,6 @@ ports { port@0 { - reg = <0>; csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi index d64fb8b1b86c..46f8dbf68904 100644 --- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi @@ -2573,6 +2573,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2628,6 +2632,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi index 5b05474dc272..d16a4be5ef77 100644 --- a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi @@ -2419,6 +2419,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2474,6 +2478,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi index 20fa3caa050e..1aef34447abd 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi @@ -1823,6 +1823,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi index 8eb006cbd9af..1f51237ab0a6 100644 --- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi @@ -2709,6 +2709,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2764,6 +2768,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77950.dtsi b/arch/arm64/boot/dts/renesas/r8a77950.dtsi index 25b87da32eeb..b643d3079db1 100644 --- a/arch/arm64/boot/dts/renesas/r8a77950.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77950.dtsi @@ -192,6 +192,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77951.dtsi b/arch/arm64/boot/dts/renesas/r8a77951.dtsi index 5c39152e4570..85d66d15465a 100644 --- a/arch/arm64/boot/dts/renesas/r8a77951.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77951.dtsi @@ -3097,6 +3097,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -3152,6 +3156,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -3191,6 +3199,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77960.dtsi b/arch/arm64/boot/dts/renesas/r8a77960.dtsi index 25d947a81b29..12476e354d74 100644 --- a/arch/arm64/boot/dts/renesas/r8a77960.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77960.dtsi @@ -2761,6 +2761,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2816,6 +2820,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77961.dtsi b/arch/arm64/boot/dts/renesas/r8a77961.dtsi index ab081f14af9a..d9804768425a 100644 --- a/arch/arm64/boot/dts/renesas/r8a77961.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77961.dtsi @@ -2499,6 +2499,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2554,6 +2558,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi index 657b20d3533b..dcb9df861d74 100644 --- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi @@ -2575,6 +2575,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2630,6 +2634,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi index 5a5d5649332a..e8f6352c3665 100644 --- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi @@ -1106,6 +1106,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi index 1ffa4a995a7a..7b51d464de0e 100644 --- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi @@ -1439,6 +1439,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -1478,6 +1482,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts index 295d34f1d216..4715e4a4abe0 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts +++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts @@ -298,8 +298,6 @@ ports { port@0 { - reg = <0>; - csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi index 5010f23fafcc..0eaea58f4210 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi @@ -1970,6 +1970,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index e18747df219f..453ffcef24fa 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -349,7 +349,6 @@ ports { port@0 { - reg = <0>; csi20_in: endpoint { clock-lanes = <0>; data-lanes = <1>; @@ -364,8 +363,6 @@ ports { port@0 { - reg = <0>; - csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2 3 4>; -- cgit From c019d92457826bb7b2091c86f36adb5de08405f9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 23 Apr 2021 17:09:28 +0200 Subject: openrisc: Fix a memory leak 'setup_find_cpu_node()' take a reference on the node it returns. This reference must be decremented when not needed anymore, or there will be a leak. Add the missing 'of_node_put(cpu)'. Note that 'setup_cpuinfo()' that also calls this function already has a correct 'of_node_put(cpu)' at its end. Fixes: 9d02a4283e9c ("OpenRISC: Boot code") Signed-off-by: Christophe JAILLET Signed-off-by: Stafford Horne --- arch/openrisc/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 2416a9f91533..c6f9e7b9f7cb 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -278,6 +278,8 @@ void calibrate_delay(void) pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n", loops_per_jiffy / (500000 / HZ), (loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy); + + of_node_put(cpu); } void __init setup_arch(char **cmdline_p) -- cgit From 4eff124347191d1548eb4e14e20e77513dcbd0fe Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 9 May 2021 12:11:02 +0300 Subject: openrisc: mm/init.c: remove unused memblock_region variable in map_ram() Kernel test robot reports: cppcheck possible warnings: (new ones prefixed by >>, may not real problems) >> arch/openrisc/mm/init.c:125:10: warning: Uninitialized variable: region [uninitvar] region->base, region->base + region->size); ^ Replace usage of memblock_region fields with 'start' and 'end' variables that are initialized in for_each_mem_range() and remove the declaration of region. Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") Reported-by: kernel test robot Signed-off-by: Mike Rapoport Signed-off-by: Stafford Horne --- arch/openrisc/mm/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index bf9b2310fc93..f3fa02b8838a 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -75,7 +75,6 @@ static void __init map_ram(void) /* These mark extents of read-only kernel pages... * ...from vmlinux.lds.S */ - struct memblock_region *region; v = PAGE_OFFSET; @@ -121,7 +120,7 @@ static void __init map_ram(void) } printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__, - region->base, region->base + region->size); + start, end); } } -- cgit From 371dcaee1ade4b1eefd541ae6ee048b5ce15b37c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 9 May 2021 12:11:03 +0300 Subject: openrisc: mm/init.c: remove unused variable 'end' in paging_init() A build with W=1 enabled produces the following warning: CC arch/openrisc/mm/init.o arch/openrisc/mm/init.c: In function 'paging_init': arch/openrisc/mm/init.c:131:16: warning: variable 'end' set but not used [-Wunused-but-set-variable] 131 | unsigned long end; | ^~~ Remove the unused variable 'end'. Signed-off-by: Mike Rapoport Signed-off-by: Stafford Horne --- arch/openrisc/mm/init.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index f3fa02b8838a..6e38ec96cab8 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -128,7 +128,6 @@ void __init paging_init(void) { extern void tlb_init(void); - unsigned long end; int i; printk(KERN_INFO "Setting up paging and PTEs.\n"); @@ -144,8 +143,6 @@ void __init paging_init(void) */ current_pgd[smp_processor_id()] = init_mm.pgd; - end = (unsigned long)__va(max_low_pfn * PAGE_SIZE); - map_ram(); zone_sizes_init(); -- cgit From be1c2bb3ba5a39c20b1d54e01ffbcb2b1ca7e46c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 27 Apr 2021 09:00:28 +0100 Subject: ARM: PXA: Fix cplds irqdesc allocation when using legacy mode The Mainstone PXA platform uses CONFIG_SPARSE_IRQ, and thus we cannot rely on the irq descriptors to be readilly allocated before creating the irqdomain in legacy mode. The kernel then complains loudly about not being able to associate the interrupt in the domain -- can't blame it. Fix it by allocating the irqdescs upfront in the legacy case. Fixes: b68761da0111 ("ARM: PXA: Kill use of irq_create_strict_mappings()") Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210426223942.GA213931@roeck-us.net --- arch/arm/mach-pxa/pxa_cplds_irqs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c index ec0d9b094744..bddfc7cd5d40 100644 --- a/arch/arm/mach-pxa/pxa_cplds_irqs.c +++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c @@ -121,8 +121,13 @@ static int cplds_probe(struct platform_device *pdev) return fpga->irq; base_irq = platform_get_irq(pdev, 1); - if (base_irq < 0) + if (base_irq < 0) { base_irq = 0; + } else { + ret = devm_irq_alloc_descs(&pdev->dev, base_irq, base_irq, CPLDS_NB_IRQ, 0); + if (ret < 0) + return ret; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); fpga->base = devm_ioremap_resource(&pdev->dev, res); -- cgit From 8b549c18ae81dbc36fb11e4aa08b8378c599ca95 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Apr 2021 14:45:43 +0200 Subject: openrisc: Define memory barrier mb This came up in the discussion of the requirements of qspinlock on an architecture. OpenRISC uses qspinlock, but it was noticed that the memmory barrier was not defined. Peter defined it in the mail thread writing: As near as I can tell this should do. The arch spec only lists this one instruction and the text makes it sound like a completion barrier. This is correct so applying this patch. Signed-off-by: Peter Zijlstra [shorne@gmail.com:Turned the mail into a patch] Signed-off-by: Stafford Horne --- arch/openrisc/include/asm/barrier.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 arch/openrisc/include/asm/barrier.h (limited to 'arch') diff --git a/arch/openrisc/include/asm/barrier.h b/arch/openrisc/include/asm/barrier.h new file mode 100644 index 000000000000..7538294721be --- /dev/null +++ b/arch/openrisc/include/asm/barrier.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_BARRIER_H +#define __ASM_BARRIER_H + +#define mb() asm volatile ("l.msync" ::: "memory") + +#include + +#endif /* __ASM_BARRIER_H */ -- cgit From 5b9fedb31e476693c90d8ee040e7d4c51b3e7cc4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 17 May 2021 14:39:56 +0200 Subject: quota: Disable quotactl_path syscall In commit fa8b90070a80 ("quota: wire up quotactl_path") we have wired up new quotactl_path syscall. However some people in LWN discussion have objected that the path based syscall is missing dirfd and flags argument which is mostly standard for contemporary path based syscalls. Indeed they have a point and after a discussion with Christian Brauner and Sascha Hauer I've decided to disable the syscall for now and update its API. Since there is no userspace currently using that syscall and it hasn't been released in any major release, we should be fine. CC: Christian Brauner CC: Sascha Hauer Link: https://lore.kernel.org/lkml/20210512153621.n5u43jsytbik4yze@wittgenstein Signed-off-by: Jan Kara --- arch/alpha/kernel/syscalls/syscall.tbl | 2 +- arch/arm/tools/syscall.tbl | 2 +- arch/arm64/include/asm/unistd32.h | 3 +-- arch/ia64/kernel/syscalls/syscall.tbl | 2 +- arch/m68k/kernel/syscalls/syscall.tbl | 2 +- arch/microblaze/kernel/syscalls/syscall.tbl | 2 +- arch/mips/kernel/syscalls/syscall_n32.tbl | 2 +- arch/mips/kernel/syscalls/syscall_n64.tbl | 2 +- arch/mips/kernel/syscalls/syscall_o32.tbl | 2 +- arch/parisc/kernel/syscalls/syscall.tbl | 2 +- arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- arch/s390/kernel/syscalls/syscall.tbl | 2 +- arch/sh/kernel/syscalls/syscall.tbl | 2 +- arch/sparc/kernel/syscalls/syscall.tbl | 2 +- arch/x86/entry/syscalls/syscall_32.tbl | 2 +- arch/x86/entry/syscalls/syscall_64.tbl | 2 +- arch/xtensa/kernel/syscalls/syscall.tbl | 2 +- 17 files changed, 17 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 5622578742fd..3000a2e8ee21 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -482,7 +482,7 @@ 550 common process_madvise sys_process_madvise 551 common epoll_pwait2 sys_epoll_pwait2 552 common mount_setattr sys_mount_setattr -553 common quotactl_path sys_quotactl_path +# 553 reserved for quotactl_path 554 common landlock_create_ruleset sys_landlock_create_ruleset 555 common landlock_add_rule sys_landlock_add_rule 556 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index c7679d7db98b..28e03b5fec00 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -456,7 +456,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 7859749d6628..5dab69d2c22b 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -893,8 +893,7 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) -#define __NR_quotactl_path 443 -__SYSCALL(__NR_quotactl_path, sys_quotactl_path) +/* 443 is reserved for quotactl_path */ #define __NR_landlock_create_ruleset 444 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) #define __NR_landlock_add_rule 445 diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 1ee8e736a48e..bb11fe4c875a 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -363,7 +363,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 0dd019dc2136..79c2d24c89dd 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -442,7 +442,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 2ac716984ca2..b11395a20c20 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -448,7 +448,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 5e0096657251..9220909526f9 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -381,7 +381,7 @@ 440 n32 process_madvise sys_process_madvise 441 n32 epoll_pwait2 compat_sys_epoll_pwait2 442 n32 mount_setattr sys_mount_setattr -443 n32 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 n32 landlock_create_ruleset sys_landlock_create_ruleset 445 n32 landlock_add_rule sys_landlock_add_rule 446 n32 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 9974f5f8e49b..9cd1c34f31b5 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -357,7 +357,7 @@ 440 n64 process_madvise sys_process_madvise 441 n64 epoll_pwait2 sys_epoll_pwait2 442 n64 mount_setattr sys_mount_setattr -443 n64 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 n64 landlock_create_ruleset sys_landlock_create_ruleset 445 n64 landlock_add_rule sys_landlock_add_rule 446 n64 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 39d6e71e57b6..d560c467a8c6 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -430,7 +430,7 @@ 440 o32 process_madvise sys_process_madvise 441 o32 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 o32 mount_setattr sys_mount_setattr -443 o32 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 o32 landlock_create_ruleset sys_landlock_create_ruleset 445 o32 landlock_add_rule sys_landlock_add_rule 446 o32 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 5ac80b83d745..aabc37f8cae3 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -440,7 +440,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 2e68fbb57cc6..8f052ff4058c 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -522,7 +522,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 7e4a2aba366d..0690263df1dd 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -445,7 +445,7 @@ 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index f47a0dc55445..0b91499ebdcf 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -445,7 +445,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index b9e1c0e735b7..e34cc30ef22c 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -488,7 +488,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 28a1423ce32e..4bbc267fb36b 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -447,7 +447,7 @@ 440 i386 process_madvise sys_process_madvise 441 i386 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 i386 mount_setattr sys_mount_setattr -443 i386 quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 i386 landlock_create_ruleset sys_landlock_create_ruleset 445 i386 landlock_add_rule sys_landlock_add_rule 446 i386 landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index ecd551b08d05..ce18119ea0d0 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,7 +364,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 9d76d433d3d6..fd2f30227d96 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -413,7 +413,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr -443 common quotactl_path sys_quotactl_path +# 443 reserved for quotactl_path 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self -- cgit From fea63d54f7a3e74f8ab489a8b82413a29849a594 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 May 2021 12:42:32 -0500 Subject: x86/sev-es: Move sev_es_put_ghcb() in prep for follow on patch Move the location of sev_es_put_ghcb() in preparation for an update to it in a follow-on patch. This will better highlight the changes being made to the function. No functional change. Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/8c07662ec17d3d82e5c53841a1d9e766d3bdbab6.1621273353.git.thomas.lendacky@amd.com --- arch/x86/kernel/sev.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 9578c82832aa..45e212675811 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -221,24 +221,6 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) return ghcb; } -static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) -{ - struct sev_es_runtime_data *data; - struct ghcb *ghcb; - - data = this_cpu_read(runtime_data); - ghcb = &data->ghcb_page; - - if (state->ghcb) { - /* Restore GHCB from Backup */ - *ghcb = *state->ghcb; - data->backup_ghcb_active = false; - state->ghcb = NULL; - } else { - data->ghcb_active = false; - } -} - /* Needed in vc_early_forward_exception */ void do_early_exception(struct pt_regs *regs, int trapnr); @@ -461,6 +443,24 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt /* Include code shared with pre-decompression boot stage */ #include "sev-shared.c" +static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) +{ + struct sev_es_runtime_data *data; + struct ghcb *ghcb; + + data = this_cpu_read(runtime_data); + ghcb = &data->ghcb_page; + + if (state->ghcb) { + /* Restore GHCB from Backup */ + *ghcb = *state->ghcb; + data->backup_ghcb_active = false; + state->ghcb = NULL; + } else { + data->ghcb_active = false; + } +} + void noinstr __sev_es_nmi_complete(void) { struct ghcb_state state; -- cgit From a50c5bebc99c525e7fbc059988c6a5ab8680cb76 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 17 May 2021 12:42:33 -0500 Subject: x86/sev-es: Invalidate the GHCB after completing VMGEXIT Since the VMGEXIT instruction can be issued from userspace, invalidate the GHCB after performing VMGEXIT processing in the kernel. Invalidation is only required after userspace is available, so call vc_ghcb_invalidate() from sev_es_put_ghcb(). Update vc_ghcb_invalidate() to additionally clear the GHCB exit code so that it is always presented as 0 when VMGEXIT has been issued by anything else besides the kernel. Fixes: 0786138c78e79 ("x86/sev-es: Add a Runtime #VC Exception Handler") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/5a8130462e4f0057ee1184509cd056eedd78742b.1621273353.git.thomas.lendacky@amd.com --- arch/x86/kernel/sev-shared.c | 1 + arch/x86/kernel/sev.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 6ec8b3bfd76e..9f90f460a28c 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -63,6 +63,7 @@ static bool sev_es_negotiate_protocol(void) static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb) { + ghcb->save.sw_exit_code = 0; memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 45e212675811..4fa111becc93 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -457,6 +457,11 @@ static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) data->backup_ghcb_active = false; state->ghcb = NULL; } else { + /* + * Invalidate the GHCB so a VMGEXIT instruction issued + * from userspace won't appear to be valid. + */ + vc_ghcb_invalidate(ghcb); data->ghcb_active = false; } } -- cgit From 76d0fc5e9bc650766a90cc3ffd2a29248df0f020 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 18 May 2021 10:08:37 +0100 Subject: arm64: Fix stale link in the arch_counter_enforce_ordering() comment With infradead.org archives gone, update the link to lore.kernel.org as these links are deemed stable. Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/barrier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 2175ec0004ed..451e11e5fd23 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -74,7 +74,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx, * This insanity brought to you by speculative system register reads, * out-of-order memory accesses, sequence locks and Thomas Gleixner. * - * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html + * https://lore.kernel.org/r/alpine.DEB.2.21.1902081950260.1662@nanos.tec.linutronix.de/ */ #define arch_counter_enforce_ordering(val) do { \ u64 tmp, _val = (val); \ -- cgit From 3317c26a4b413b41364f2c4b83c778c6aba1576d Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 30 Apr 2021 13:22:46 +0800 Subject: perf/x86: Avoid touching LBR_TOS MSR for Arch LBR The Architecture LBR does not have MSR_LBR_TOS (0x000001c9). In a guest that should support Architecture LBR, check_msr() will be a non-related check for the architecture MSR 0x0 (IA32_P5_MC_ADDR) that is also not supported by KVM. The failure will cause x86_pmu.lbr_nr = 0, thereby preventing the initialization of the guest Arch LBR. Fix it by avoiding this extraneous check in intel_pmu_init() for Arch LBR. Fixes: 47125db27e47 ("perf/x86/intel/lbr: Support Architectural LBR") Signed-off-by: Like Xu [peterz: simpler still] Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20210430052247.3079672-1-like.xu@linux.intel.com --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2521d03de5e0..e28892270c58 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6253,7 +6253,7 @@ __init int intel_pmu_init(void) * Check all LBT MSR here. * Disable LBR access if any LBR MSRs can not be accessed. */ - if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL)) + if (x86_pmu.lbr_tos && !check_msr(x86_pmu.lbr_tos, 0x3UL)) x86_pmu.lbr_nr = 0; for (i = 0; i < x86_pmu.lbr_nr; i++) { if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) && -- cgit From 488e13a489e9707a7e81e1991fdd1f20c0f04689 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Fri, 30 Apr 2021 13:22:47 +0800 Subject: perf/x86/lbr: Remove cpuc->lbr_xsave allocation from atomic context If the kernel is compiled with the CONFIG_LOCKDEP option, the conditional might_sleep_if() deep in kmem_cache_alloc() will generate the following trace, and potentially cause a deadlock when another LBR event is added: [] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:196 [] Call Trace: [] kmem_cache_alloc+0x36/0x250 [] intel_pmu_lbr_add+0x152/0x170 [] x86_pmu_add+0x83/0xd0 Make it symmetric with the release_lbr_buffers() call and mirror the existing DS buffers. Fixes: c085fb8774 ("perf/x86/intel/lbr: Support XSAVES for arch LBR read") Signed-off-by: Like Xu [peterz: simplified] Signed-off-by: Peter Zijlstra (Intel) Tested-by: Kan Liang Link: https://lkml.kernel.org/r/20210430052247.3079672-2-like.xu@linux.intel.com --- arch/x86/events/core.c | 6 ++++-- arch/x86/events/intel/lbr.c | 26 ++++++++++++++++++++------ arch/x86/events/perf_event.h | 6 ++++++ 3 files changed, 30 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8e509325c2c3..8f71dd72ef95 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -396,10 +396,12 @@ int x86_reserve_hardware(void) if (!atomic_inc_not_zero(&pmc_refcount)) { mutex_lock(&pmc_reserve_mutex); if (atomic_read(&pmc_refcount) == 0) { - if (!reserve_pmc_hardware()) + if (!reserve_pmc_hardware()) { err = -EBUSY; - else + } else { reserve_ds_buffers(); + reserve_lbr_buffers(); + } } if (!err) atomic_inc(&pmc_refcount); diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 76dbab6ac9fb..4409d2cccfda 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -658,7 +658,6 @@ static inline bool branch_user_callstack(unsigned br_sel) void intel_pmu_lbr_add(struct perf_event *event) { - struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); if (!x86_pmu.lbr_nr) @@ -696,11 +695,6 @@ void intel_pmu_lbr_add(struct perf_event *event) perf_sched_cb_inc(event->ctx->pmu); if (!cpuc->lbr_users++ && !event->total_time_running) intel_pmu_lbr_reset(); - - if (static_cpu_has(X86_FEATURE_ARCH_LBR) && - kmem_cache && !cpuc->lbr_xsave && - (cpuc->lbr_users != cpuc->lbr_pebs_users)) - cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL); } void release_lbr_buffers(void) @@ -722,6 +716,26 @@ void release_lbr_buffers(void) } } +void reserve_lbr_buffers(void) +{ + struct kmem_cache *kmem_cache; + struct cpu_hw_events *cpuc; + int cpu; + + if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) + return; + + for_each_possible_cpu(cpu) { + cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; + if (!kmem_cache || cpuc->lbr_xsave) + continue; + + cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, GFP_KERNEL, + cpu_to_node(cpu)); + } +} + void intel_pmu_lbr_del(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 27fa85e7d4fd..ad87cb36f7c8 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1244,6 +1244,8 @@ void reserve_ds_buffers(void); void release_lbr_buffers(void); +void reserve_lbr_buffers(void); + extern struct event_constraint bts_constraint; extern struct event_constraint vlbr_constraint; @@ -1393,6 +1395,10 @@ static inline void release_lbr_buffers(void) { } +static inline void reserve_lbr_buffers(void) +{ +} + static inline int intel_pmu_init(void) { return 0; -- cgit From d37316b72e8bf95a52d1c3e93c823d128c09b521 Mon Sep 17 00:00:00 2001 From: Jonathan Neuschäfer Date: Tue, 18 May 2021 16:45:14 +0930 Subject: ARM: npcm: wpcm450: select interrupt controller driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The interrupt controller driver is necessary in order to have a functioning Linux system on WPCM450. Select it in mach-npcm/Kconfig. Fixes: ece3fe93e8f4 ("ARM: npcm: Introduce Nuvoton WPCM450 SoC") Signed-off-by: Jonathan Neuschäfer Signed-off-by: Joel Stanley Reviewed-by: Joel Stanley Link: https://lore.kernel.org/r/20210513165627.1767093-1-j.neuschaefer@gmx.net Link: https://lore.kernel.org/r/20210518071514.604492-1-joel@jms.id.au' Signed-off-by: Arnd Bergmann --- arch/arm/mach-npcm/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-npcm/Kconfig b/arch/arm/mach-npcm/Kconfig index 658c8efb4ca1..a71cf1d189ae 100644 --- a/arch/arm/mach-npcm/Kconfig +++ b/arch/arm/mach-npcm/Kconfig @@ -10,6 +10,7 @@ config ARCH_WPCM450 bool "Support for WPCM450 BMC (Hermon)" depends on ARCH_MULTI_V5 select CPU_ARM926T + select WPCM450_AIC select NPCM7XX_TIMER help General support for WPCM450 BMC (Hermon). -- cgit From add0b32ef9146a8559a60aed54c37692a5f9d34f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 30 Apr 2021 17:06:01 -0500 Subject: siginfo: Move si_trapno inside the union inside _si_fault It turns out that linux uses si_trapno very sparingly, and as such it can be considered extra information for a very narrow selection of signals, rather than information that is present with every fault reported in siginfo. As such move si_trapno inside the union inside of _si_fault. This results in no change in placement, and makes it eaiser to extend _si_fault in the future as this reduces the number of special cases. In particular with si_trapno included in the union it is no longer a concern that the union must be pointer aligned on most architectures because the union follows immediately after si_addr which is a pointer. This change results in a difference in siginfo field placement on sparc and alpha for the fields si_addr_lsb, si_lower, si_upper, si_pkey, and si_perf. These architectures do not implement the signals that would use si_addr_lsb, si_lower, si_upper, si_pkey, and si_perf. Further these architecture have not yet implemented the userspace that would use si_perf. The point of this change is in fact to correct these placement issues before sparc or alpha grow userspace that cares. This change was discussed[1] and the agreement is that this change is currently safe. [1]: https://lkml.kernel.org/r/CAK8P3a0+uKYwL1NhY6Hvtieghba2hKYGD6hcKx5n8=4Gtt+pHA@mail.gmail.com Acked-by: Marco Elver v1: https://lkml.kernel.org/r/m1tunns7yf.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210505141101.11519-5-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-1-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" --- arch/x86/kernel/signal_compat.c | 3 +++ include/linux/compat.h | 5 ++--- include/uapi/asm-generic/siginfo.h | 7 ++----- kernel/signal.c | 1 + 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 0e5d0a7e203b..a9fcabd8a5e5 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -127,6 +127,9 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x10); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr) != 0x0C); + BUILD_BUG_ON(offsetof(siginfo_t, si_trapno) != 0x18); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_trapno) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x18); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_addr_lsb) != 0x10); diff --git a/include/linux/compat.h b/include/linux/compat.h index f0d2dd35d408..6af7bef15e94 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -214,12 +214,11 @@ typedef struct compat_siginfo { /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */ struct { compat_uptr_t _addr; /* faulting insn/memory ref. */ -#ifdef __ARCH_SI_TRAPNO - int _trapno; /* TRAP # which caused the signal */ -#endif #define __COMPAT_ADDR_BND_PKEY_PAD (__alignof__(compat_uptr_t) < sizeof(short) ? \ sizeof(short) : __alignof__(compat_uptr_t)) union { + /* used on alpha and sparc */ + int _trapno; /* TRAP # which caused the signal */ /* * used when si_code=BUS_MCEERR_AR or * used when si_code=BUS_MCEERR_AO diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index 03d6f6d2c1fe..e663bf117b46 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -63,9 +63,6 @@ union __sifields { /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */ struct { void __user *_addr; /* faulting insn/memory ref. */ -#ifdef __ARCH_SI_TRAPNO - int _trapno; /* TRAP # which caused the signal */ -#endif #ifdef __ia64__ int _imm; /* immediate value for "break" */ unsigned int _flags; /* see ia64 si_flags */ @@ -75,6 +72,8 @@ union __sifields { #define __ADDR_BND_PKEY_PAD (__alignof__(void *) < sizeof(short) ? \ sizeof(short) : __alignof__(void *)) union { + /* used on alpha and sparc */ + int _trapno; /* TRAP # which caused the signal */ /* * used when si_code=BUS_MCEERR_AR or * used when si_code=BUS_MCEERR_AO @@ -150,9 +149,7 @@ typedef struct siginfo { #define si_int _sifields._rt._sigval.sival_int #define si_ptr _sifields._rt._sigval.sival_ptr #define si_addr _sifields._sigfault._addr -#ifdef __ARCH_SI_TRAPNO #define si_trapno _sifields._sigfault._trapno -#endif #define si_addr_lsb _sifields._sigfault._addr_lsb #define si_lower _sifields._sigfault._addr_bnd._lower #define si_upper _sifields._sigfault._addr_bnd._upper diff --git a/kernel/signal.c b/kernel/signal.c index c3017aa8024a..65888aec65a0 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -4607,6 +4607,7 @@ static inline void siginfo_buildtime_checks(void) /* sigfault */ CHECK_OFFSET(si_addr); + CHECK_OFFSET(si_trapno); CHECK_OFFSET(si_addr_lsb); CHECK_OFFSET(si_lower); CHECK_OFFSET(si_upper); -- cgit From 0683b53197b55343a166f1507086823030809a19 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 2 May 2021 17:28:31 -0500 Subject: signal: Deliver all of the siginfo perf data in _perf Don't abuse si_errno and deliver all of the perf data in _perf member of siginfo_t. Note: The data field in the perf data structures in a u64 to allow a pointer to be encoded without needed to implement a 32bit and 64bit version of the same structure. There already exists a 32bit and 64bit versions siginfo_t, and the 32bit version can not include a 64bit member as it only has 32bit alignment. So unsigned long is used in siginfo_t instead of a u64 as unsigned long can encode a pointer on all architectures linux supports. v1: https://lkml.kernel.org/r/m11rarqqx2.fsf_-_@fess.ebiederm.org v2: https://lkml.kernel.org/r/20210503203814.25487-10-ebiederm@xmission.com v3: https://lkml.kernel.org/r/20210505141101.11519-11-ebiederm@xmission.com Link: https://lkml.kernel.org/r/20210517195748.8880-4-ebiederm@xmission.com Reviewed-by: Marco Elver Signed-off-by: "Eric W. Biederman" --- arch/m68k/kernel/signal.c | 3 ++- arch/x86/kernel/signal_compat.c | 6 ++++-- fs/signalfd.c | 3 ++- include/linux/compat.h | 5 ++++- include/uapi/asm-generic/siginfo.h | 8 ++++++-- include/uapi/linux/perf_event.h | 2 +- include/uapi/linux/signalfd.h | 4 ++-- kernel/signal.c | 21 +++++++++++++-------- .../testing/selftests/perf_events/sigtrap_threads.c | 14 +++++++------- 9 files changed, 41 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index a4b7ee1df211..8f215e79e70e 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -623,7 +623,8 @@ static inline void siginfo_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x12); /* _sigfault._perf */ - BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x14); /* _sigpoll */ BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0c); diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index a9fcabd8a5e5..06743ec054d2 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -141,8 +141,10 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x20); BUILD_BUG_ON(offsetof(compat_siginfo_t, si_pkey) != 0x14); - BUILD_BUG_ON(offsetof(siginfo_t, si_perf) != 0x18); - BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf) != 0x10); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_data) != 0x18); + BUILD_BUG_ON(offsetof(siginfo_t, si_perf_type) != 0x20); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_data) != 0x10); + BUILD_BUG_ON(offsetof(compat_siginfo_t, si_perf_type) != 0x14); CHECK_CSI_OFFSET(_sigpoll); CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int)); diff --git a/fs/signalfd.c b/fs/signalfd.c index e87e59581653..373df2f12415 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -134,7 +134,8 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, break; case SIL_PERF_EVENT: new.ssi_addr = (long) kinfo->si_addr; - new.ssi_perf = kinfo->si_perf; + new.ssi_perf_type = kinfo->si_perf_type; + new.ssi_perf_data = kinfo->si_perf_data; break; case SIL_CHLD: new.ssi_pid = kinfo->si_pid; diff --git a/include/linux/compat.h b/include/linux/compat.h index 6af7bef15e94..a27fffaae121 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -236,7 +236,10 @@ typedef struct compat_siginfo { u32 _pkey; } _addr_pkey; /* used when si_code=TRAP_PERF */ - compat_ulong_t _perf; + struct { + compat_ulong_t _data; + u32 _type; + } _perf; }; } _sigfault; diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index e663bf117b46..5a3c221f4c9d 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -91,7 +91,10 @@ union __sifields { __u32 _pkey; } _addr_pkey; /* used when si_code=TRAP_PERF */ - unsigned long _perf; + struct { + unsigned long _data; + __u32 _type; + } _perf; }; } _sigfault; @@ -154,7 +157,8 @@ typedef struct siginfo { #define si_lower _sifields._sigfault._addr_bnd._lower #define si_upper _sifields._sigfault._addr_bnd._upper #define si_pkey _sifields._sigfault._addr_pkey._pkey -#define si_perf _sifields._sigfault._perf +#define si_perf_data _sifields._sigfault._perf._data +#define si_perf_type _sifields._sigfault._perf._type #define si_band _sifields._sigpoll._band #define si_fd _sifields._sigpoll._fd #define si_call_addr _sifields._sigsys._call_addr diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index e54e639248c8..7b14753b3d38 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -464,7 +464,7 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via - * siginfo_t::si_perf, e.g. to permit user to identify the event. + * siginfo_t::si_perf_data, e.g. to permit user to identify the event. */ __u64 sig_data; }; diff --git a/include/uapi/linux/signalfd.h b/include/uapi/linux/signalfd.h index 7e333042c7e3..e78dddf433fc 100644 --- a/include/uapi/linux/signalfd.h +++ b/include/uapi/linux/signalfd.h @@ -39,8 +39,8 @@ struct signalfd_siginfo { __s32 ssi_syscall; __u64 ssi_call_addr; __u32 ssi_arch; - __u32 __pad3; - __u64 ssi_perf; + __u32 ssi_perf_type; + __u64 ssi_perf_data; /* * Pad strcture to 128 bytes. Remember to update the diff --git a/kernel/signal.c b/kernel/signal.c index 3a18d13c39b2..dca53515ae3f 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1768,11 +1768,13 @@ int force_sig_perf(void __user *addr, u32 type, u64 sig_data) struct kernel_siginfo info; clear_siginfo(&info); - info.si_signo = SIGTRAP; - info.si_errno = type; - info.si_code = TRAP_PERF; - info.si_addr = addr; - info.si_perf = sig_data; + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_PERF; + info.si_addr = addr; + info.si_perf_data = sig_data; + info.si_perf_type = type; + return force_sig_info(&info); } @@ -3356,7 +3358,8 @@ void copy_siginfo_to_external32(struct compat_siginfo *to, break; case SIL_PERF_EVENT: to->si_addr = ptr_to_compat(from->si_addr); - to->si_perf = from->si_perf; + to->si_perf_data = from->si_perf_data; + to->si_perf_type = from->si_perf_type; break; case SIL_CHLD: to->si_pid = from->si_pid; @@ -3432,7 +3435,8 @@ static int post_copy_siginfo_from_user32(kernel_siginfo_t *to, break; case SIL_PERF_EVENT: to->si_addr = compat_ptr(from->si_addr); - to->si_perf = from->si_perf; + to->si_perf_data = from->si_perf_data; + to->si_perf_type = from->si_perf_type; break; case SIL_CHLD: to->si_pid = from->si_pid; @@ -4615,7 +4619,8 @@ static inline void siginfo_buildtime_checks(void) CHECK_OFFSET(si_lower); CHECK_OFFSET(si_upper); CHECK_OFFSET(si_pkey); - CHECK_OFFSET(si_perf); + CHECK_OFFSET(si_perf_data); + CHECK_OFFSET(si_perf_type); /* sigpoll */ CHECK_OFFSET(si_band); diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c index 78ddf5e11625..8e83cf91513a 100644 --- a/tools/testing/selftests/perf_events/sigtrap_threads.c +++ b/tools/testing/selftests/perf_events/sigtrap_threads.c @@ -43,7 +43,7 @@ static struct { siginfo_t first_siginfo; /* First observed siginfo_t. */ } ctx; -/* Unique value to check si_perf is correctly set from perf_event_attr::sig_data. */ +/* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */ #define TEST_SIG_DATA(addr) (~(unsigned long)(addr)) static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr) @@ -164,8 +164,8 @@ TEST_F(sigtrap_threads, enable_event) EXPECT_EQ(ctx.signal_count, NUM_THREADS); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -183,8 +183,8 @@ TEST_F(sigtrap_threads, modify_and_enable_event) EXPECT_EQ(ctx.signal_count, NUM_THREADS); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -203,8 +203,8 @@ TEST_F(sigtrap_threads, signal_stress) EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on); EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); - EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); } TEST_HARNESS_MAIN -- cgit From 0024430e920f2900654ad83cd081cf52e02a3ef5 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 18 May 2021 12:01:06 -0700 Subject: x86/build: Fix location of '-plugin-opt=' flags Commit b33fff07e3e3 ("x86, build: allow LTO to be selected") added a couple of '-plugin-opt=' flags to KBUILD_LDFLAGS because the code model and stack alignment are not stored in LLVM bitcode. However, these flags were added to KBUILD_LDFLAGS prior to the emulation flag assignment, which uses ':=', so they were overwritten and never added to $(LD) invocations. The absence of these flags caused misalignment issues in the AMDGPU driver when compiling with CONFIG_LTO_CLANG, resulting in general protection faults. Shuffle the assignment below the initial one so that the flags are properly passed along and all of the linker flags stay together. At the same time, avoid any future issues with clobbering flags by changing the emulation flag assignment to '+=' since KBUILD_LDFLAGS is already defined with ':=' in the main Makefile before being exported for modification here as a result of commit: ce99d0bf312d ("kbuild: clear LDFLAGS in the top Makefile") Fixes: b33fff07e3e3 ("x86, build: allow LTO to be selected") Reported-by: Anthony Ruhier Signed-off-by: Nathan Chancellor Signed-off-by: Ingo Molnar Tested-by: Anthony Ruhier Cc: stable@vger.kernel.org Link: https://github.com/ClangBuiltLinux/linux/issues/1374 Link: https://lore.kernel.org/r/20210518190106.60935-1-nathan@kernel.org --- arch/x86/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index c77c5d8a7b3e..307529417021 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -178,11 +178,6 @@ ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,) endif -ifdef CONFIG_LTO_CLANG -KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \ - -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) -endif - # Workaround for a gcc prelease that unfortunately was shipped in a suse release KBUILD_CFLAGS += -Wno-sign-compare # @@ -202,7 +197,12 @@ ifdef CONFIG_RETPOLINE endif endif -KBUILD_LDFLAGS := -m elf_$(UTS_MACHINE) +KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) + +ifdef CONFIG_LTO_CLANG +KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \ + -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8) +endif ifdef CONFIG_X86_NEED_RELOCS LDFLAGS_vmlinux := --emit-relocs --discard-none -- cgit From b250f2f7792d15bcde98e0456781e2835556d5fa Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 19 May 2021 15:52:44 +0200 Subject: x86/sev-es: Don't return NULL from sev_es_get_ghcb() sev_es_get_ghcb() is called from several places but only one of them checks the return value. The reaction to returning NULL is always the same: calling panic() and kill the machine. Instead of adding checks to all call sites, move the panic() into the function itself so that it will no longer return NULL. Fixes: 0786138c78e7 ("x86/sev-es: Add a Runtime #VC Exception Handler") Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # v5.10+ Link: https://lkml.kernel.org/r/20210519135251.30093-2-joro@8bytes.org --- arch/x86/kernel/sev.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 4fa111becc93..82bced88153b 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -203,8 +203,18 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) if (unlikely(data->ghcb_active)) { /* GHCB is already in use - save its contents */ - if (unlikely(data->backup_ghcb_active)) - return NULL; + if (unlikely(data->backup_ghcb_active)) { + /* + * Backup-GHCB is also already in use. There is no way + * to continue here so just kill the machine. To make + * panic() work, mark GHCBs inactive so that messages + * can be printed out. + */ + data->ghcb_active = false; + data->backup_ghcb_active = false; + + panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); + } /* Mark backup_ghcb active before writing to it */ data->backup_ghcb_active = true; @@ -1289,7 +1299,6 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) */ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) { - struct sev_es_runtime_data *data = this_cpu_read(runtime_data); irqentry_state_t irq_state; struct ghcb_state state; struct es_em_ctxt ctxt; @@ -1315,16 +1324,6 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) */ ghcb = sev_es_get_ghcb(&state); - if (!ghcb) { - /* - * Mark GHCBs inactive so that panic() is able to print the - * message. - */ - data->ghcb_active = false; - data->backup_ghcb_active = false; - - panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); - } vc_ghcb_invalidate(ghcb); result = vc_init_em_ctxt(&ctxt, regs, error_code); -- cgit From c25bbdb564060adaad5c3a8a10765c13487ba6a3 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 19 May 2021 15:52:45 +0200 Subject: x86/sev-es: Forward page-faults which happen during emulation When emulating guest instructions for MMIO or IOIO accesses, the #VC handler might get a page-fault and will not be able to complete. Forward the page-fault in this case to the correct handler instead of killing the machine. Fixes: 0786138c78e7 ("x86/sev-es: Add a Runtime #VC Exception Handler") Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # v5.10+ Link: https://lkml.kernel.org/r/20210519135251.30093-3-joro@8bytes.org --- arch/x86/kernel/sev.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 82bced88153b..1f428f401bed 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1270,6 +1270,10 @@ static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) case X86_TRAP_UD: exc_invalid_op(ctxt->regs); break; + case X86_TRAP_PF: + write_cr2(ctxt->fi.cr2); + exc_page_fault(ctxt->regs, error_code); + break; case X86_TRAP_AC: exc_alignment_check(ctxt->regs, error_code); break; -- cgit From 4954f5b8ef0baf70fe978d1a99a5f70e4dd5c877 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 19 May 2021 15:52:46 +0200 Subject: x86/sev-es: Use __put_user()/__get_user() for data accesses The put_user() and get_user() functions do checks on the address which is passed to them. They check whether the address is actually a user-space address and whether its fine to access it. They also call might_fault() to indicate that they could fault and possibly sleep. All of these checks are neither wanted nor needed in the #VC exception handler, which can be invoked from almost any context and also for MMIO instructions from kernel space on kernel memory. All the #VC handler wants to know is whether a fault happened when the access was tried. This is provided by __put_user()/__get_user(), which just do the access no matter what. Also add comments explaining why __get_user() and __put_user() are the best choice here and why it is safe to use them in this context. Also explain why copy_to/from_user can't be used. In addition, also revert commit 7024f60d6552 ("x86/sev-es: Handle string port IO to kernel memory properly") because using __get_user()/__put_user() fixes the same problem while the above commit introduced several problems: 1) It uses access_ok() which is only allowed in task context. 2) It uses memcpy() which has no fault handling at all and is thus unsafe to use here. [ bp: Fix up commit ID of the reverted commit above. ] Fixes: f980f9c31a92 ("x86/sev-es: Compile early handler code into kernel image") Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov Cc: stable@vger.kernel.org # v5.10+ Link: https://lkml.kernel.org/r/20210519135251.30093-4-joro@8bytes.org --- arch/x86/kernel/sev.c | 66 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 1f428f401bed..651b81cd648e 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -315,31 +315,44 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; - /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ - if (!user_mode(ctxt->regs) && !access_ok(target, size)) { - memcpy(dst, buf, size); - return ES_OK; - } - + /* + * This function uses __put_user() independent of whether kernel or user + * memory is accessed. This works fine because __put_user() does no + * sanity checks of the pointer being accessed. All that it does is + * to report when the access failed. + * + * Also, this function runs in atomic context, so __put_user() is not + * allowed to sleep. The page-fault handler detects that it is running + * in atomic context and will not try to take mmap_sem and handle the + * fault, so additional pagefault_enable()/disable() calls are not + * needed. + * + * The access can't be done via copy_to_user() here because + * vc_write_mem() must not use string instructions to access unsafe + * memory. The reason is that MOVS is emulated by the #VC handler by + * splitting the move up into a read and a write and taking a nested #VC + * exception on whatever of them is the MMIO access. Using string + * instructions here would cause infinite nesting. + */ switch (size) { case 1: memcpy(&d1, buf, 1); - if (put_user(d1, target)) + if (__put_user(d1, target)) goto fault; break; case 2: memcpy(&d2, buf, 2); - if (put_user(d2, target)) + if (__put_user(d2, target)) goto fault; break; case 4: memcpy(&d4, buf, 4); - if (put_user(d4, target)) + if (__put_user(d4, target)) goto fault; break; case 8: memcpy(&d8, buf, 8); - if (put_user(d8, target)) + if (__put_user(d8, target)) goto fault; break; default: @@ -370,30 +383,43 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, u16 d2; u8 d1; - /* If instruction ran in kernel mode and the I/O buffer is in kernel space */ - if (!user_mode(ctxt->regs) && !access_ok(s, size)) { - memcpy(buf, src, size); - return ES_OK; - } - + /* + * This function uses __get_user() independent of whether kernel or user + * memory is accessed. This works fine because __get_user() does no + * sanity checks of the pointer being accessed. All that it does is + * to report when the access failed. + * + * Also, this function runs in atomic context, so __get_user() is not + * allowed to sleep. The page-fault handler detects that it is running + * in atomic context and will not try to take mmap_sem and handle the + * fault, so additional pagefault_enable()/disable() calls are not + * needed. + * + * The access can't be done via copy_from_user() here because + * vc_read_mem() must not use string instructions to access unsafe + * memory. The reason is that MOVS is emulated by the #VC handler by + * splitting the move up into a read and a write and taking a nested #VC + * exception on whatever of them is the MMIO access. Using string + * instructions here would cause infinite nesting. + */ switch (size) { case 1: - if (get_user(d1, s)) + if (__get_user(d1, s)) goto fault; memcpy(buf, &d1, 1); break; case 2: - if (get_user(d2, s)) + if (__get_user(d2, s)) goto fault; memcpy(buf, &d2, 2); break; case 4: - if (get_user(d4, s)) + if (__get_user(d4, s)) goto fault; memcpy(buf, &d4, 4); break; case 8: - if (get_user(d8, s)) + if (__get_user(d8, s)) goto fault; memcpy(buf, &d8, 8); break; -- cgit From e2f5efd0f0e229bd110eab513e7c0331d61a4649 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 20 May 2021 13:29:19 +1000 Subject: powerpc: Fix early setup to make early_ioremap() work The immediate problem is that after commit 0bd3f9e953bd ("powerpc/legacy_serial: Use early_ioremap()") the kernel silently reboots on some systems. The reason is that early_ioremap() returns broken addresses as it uses slot_virt[] array which initialized with offsets from FIXADDR_TOP == IOREMAP_END+FIXADDR_SIZE == KERN_IO_END - FIXADDR_SIZ + FIXADDR_SIZE == __kernel_io_end which is 0 when early_ioremap_setup() is called. __kernel_io_end is initialized little bit later in early_init_mmu(). This fixes the initialization by swapping early_ioremap_setup() and early_init_mmu(). Fixes: 265c3491c4bc ("powerpc: Add support for GENERIC_EARLY_IOREMAP") Signed-off-by: Alexey Kardashevskiy Reviewed-by: Christophe Leroy [mpe: Drop unrelated cleanup & cleanup change log] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210520032919.358935-1-aik@ozlabs.ru --- arch/powerpc/kernel/setup_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index b779d25761cf..e42b85e4f1aa 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -369,11 +369,11 @@ void __init early_setup(unsigned long dt_ptr) apply_feature_fixups(); setup_feature_keys(); - early_ioremap_setup(); - /* Initialize the hash table or TLB handling */ early_init_mmu(); + early_ioremap_setup(); + /* * After firmware and early platform setup code has set things up, * we note the SPR values for configurable control/performance -- cgit From d72500f992849d31ebae8f821a023660ddd0dcc2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 20 May 2021 21:19:31 +1000 Subject: powerpc/64s/syscall: Fix ptrace syscall info with scv syscalls The scv implementation missed updating syscall return value and error value get/set functions to deal with the changed register ABI. This broke ptrace PTRACE_GET_SYSCALL_INFO as well as some kernel auditing and tracing functions. Fix. tools/testing/selftests/ptrace/get_syscall_info now passes when scv is used. Fixes: 7fa95f9adaee ("powerpc/64s: system call support for scv/rfscv instructions") Cc: stable@vger.kernel.org # v5.9+ Reported-by: "Dmitry V. Levin" Signed-off-by: Nicholas Piggin Reviewed-by: Dmitry V. Levin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210520111931.2597127-2-npiggin@gmail.com --- arch/powerpc/include/asm/ptrace.h | 45 ++++++++++++++++++++++---------------- arch/powerpc/include/asm/syscall.h | 42 +++++++++++++++++++++-------------- 2 files changed, 52 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 9c9ab2746168..b476a685f066 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -19,6 +19,7 @@ #ifndef _ASM_POWERPC_PTRACE_H #define _ASM_POWERPC_PTRACE_H +#include #include #include @@ -152,25 +153,6 @@ extern unsigned long profile_pc(struct pt_regs *regs); long do_syscall_trace_enter(struct pt_regs *regs); void do_syscall_trace_leave(struct pt_regs *regs); -#define kernel_stack_pointer(regs) ((regs)->gpr[1]) -static inline int is_syscall_success(struct pt_regs *regs) -{ - return !(regs->ccr & 0x10000000); -} - -static inline long regs_return_value(struct pt_regs *regs) -{ - if (is_syscall_success(regs)) - return regs->gpr[3]; - else - return -regs->gpr[3]; -} - -static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) -{ - regs->gpr[3] = rc; -} - #ifdef __powerpc64__ #define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1) #else @@ -235,6 +217,31 @@ static __always_inline void set_trap_norestart(struct pt_regs *regs) regs->trap |= 0x1; } +#define kernel_stack_pointer(regs) ((regs)->gpr[1]) +static inline int is_syscall_success(struct pt_regs *regs) +{ + if (trap_is_scv(regs)) + return !IS_ERR_VALUE((unsigned long)regs->gpr[3]); + else + return !(regs->ccr & 0x10000000); +} + +static inline long regs_return_value(struct pt_regs *regs) +{ + if (trap_is_scv(regs)) + return regs->gpr[3]; + + if (is_syscall_success(regs)) + return regs->gpr[3]; + else + return -regs->gpr[3]; +} + +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->gpr[3] = rc; +} + #define arch_has_single_step() (1) #define arch_has_block_step() (true) #define ARCH_HAS_USER_SINGLE_STEP_REPORT diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index fd1b518eed17..ba0f88f3a30d 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -41,11 +41,17 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - /* - * If the system call failed, - * regs->gpr[3] contains a positive ERRORCODE. - */ - return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; + if (trap_is_scv(regs)) { + unsigned long error = regs->gpr[3]; + + return IS_ERR_VALUE(error) ? error : 0; + } else { + /* + * If the system call failed, + * regs->gpr[3] contains a positive ERRORCODE. + */ + return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0; + } } static inline long syscall_get_return_value(struct task_struct *task, @@ -58,18 +64,22 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - /* - * In the general case it's not obvious that we must deal with CCR - * here, as the syscall exit path will also do that for us. However - * there are some places, eg. the signal code, which check ccr to - * decide if the value in r3 is actually an error. - */ - if (error) { - regs->ccr |= 0x10000000L; - regs->gpr[3] = error; + if (trap_is_scv(regs)) { + regs->gpr[3] = (long) error ?: val; } else { - regs->ccr &= ~0x10000000L; - regs->gpr[3] = val; + /* + * In the general case it's not obvious that we must deal with + * CCR here, as the syscall exit path will also do that for us. + * However there are some places, eg. the signal code, which + * check ccr to decide if the value in r3 is actually an error. + */ + if (error) { + regs->ccr |= 0x10000000L; + regs->gpr[3] = error; + } else { + regs->ccr &= ~0x10000000L; + regs->gpr[3] = val; + } } } -- cgit From 6bdacdb48e94ff26c03c6eeeef48c03c5e2f7dd4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 12 May 2021 20:57:14 +0200 Subject: bpf: Fix BPF_JIT kconfig symbol dependency Randy reported a randconfig build error recently on i386: ld: arch/x86/net/bpf_jit_comp32.o: in function `do_jit': bpf_jit_comp32.c:(.text+0x28c9): undefined reference to `__bpf_call_base' ld: arch/x86/net/bpf_jit_comp32.o: in function `bpf_int_jit_compile': bpf_jit_comp32.c:(.text+0x3694): undefined reference to `bpf_jit_blind_constants' ld: bpf_jit_comp32.c:(.text+0x3719): undefined reference to `bpf_jit_binary_free' ld: bpf_jit_comp32.c:(.text+0x3745): undefined reference to `bpf_jit_binary_alloc' ld: bpf_jit_comp32.c:(.text+0x37d3): undefined reference to `bpf_jit_prog_release_other' [...] The cause was that b24abcff918a ("bpf, kconfig: Add consolidated menu entry for bpf with core options") moved BPF_JIT from net/Kconfig into kernel/bpf/Kconfig and previously BPF_JIT was guarded by a 'if NET'. However, there is no actual dependency on NET, it's just that menuconfig NET selects BPF. And the latter in turn causes kernel/bpf/core.o to be built which contains above symbols. Randy's randconfig didn't have NET set, and BPF wasn't either, but BPF_JIT otoh was. Detangle this by making BPF_JIT depend on BPF instead. arm64 was the only arch that pulled in its JIT in net/ via obj-$(CONFIG_NET), all others unconditionally pull this dir in via obj-y. Do the same since CONFIG_NET guard there is really useless as we compiled the JIT via obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o anyway. Fixes: b24abcff918a ("bpf, kconfig: Add consolidated menu entry for bpf with core options") Reported-by: Randy Dunlap Signed-off-by: Daniel Borkmann Acked-by: Randy Dunlap Tested-by: Randy Dunlap --- arch/arm64/Kbuild | 3 +-- kernel/bpf/Kconfig | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild index d6465823b281..7b393cfec071 100644 --- a/arch/arm64/Kbuild +++ b/arch/arm64/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += kernel/ mm/ -obj-$(CONFIG_NET) += net/ +obj-y += kernel/ mm/ net/ obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_XEN) += xen/ obj-$(CONFIG_CRYPTO) += crypto/ diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig index 26b591e23f16..bd04f4a44c01 100644 --- a/kernel/bpf/Kconfig +++ b/kernel/bpf/Kconfig @@ -37,6 +37,7 @@ config BPF_SYSCALL config BPF_JIT bool "Enable BPF Just In Time compiler" + depends on BPF depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT depends on MODULES help -- cgit From ae897fda4f507e4b239f0bdfd578b3688ca96fb4 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 20 May 2021 13:42:42 +0200 Subject: x86/Xen: swap NX determination and GDT setup on BSP xen_setup_gdt(), via xen_load_gdt_boot(), wants to adjust page tables. For this to work when NX is not available, x86_configure_nx() needs to be called first. [jgross] Note that this is a revert of 36104cb9012a82e73 ("x86/xen: Delay get_cpu_cap until stack canary is established"), which is possible now that we no longer support running as PV guest in 32-bit mode. Cc: # 5.9 Fixes: 36104cb9012a82e73 ("x86/xen: Delay get_cpu_cap until stack canary is established") Reported-by: Olaf Hering Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/12a866b0-9e89-59f7-ebeb-a2a6cec0987a@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten_pv.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 17503fed2017..e87699aa2dc8 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1273,16 +1273,16 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Get mfn list */ xen_build_dynamic_phys_to_machine(); + /* Work out if we support NX */ + get_cpu_cap(&boot_cpu_data); + x86_configure_nx(); + /* * Set up kernel GDT and segment registers, mainly so that * -fstack-protector code can be executed. */ xen_setup_gdt(0); - /* Work out if we support NX */ - get_cpu_cap(&boot_cpu_data); - x86_configure_nx(); - /* Determine virtual and physical address sizes */ get_cpu_address_sizes(&boot_cpu_data); -- cgit From eac2f3059e02382d91f8c887462083841d6ea2a3 Mon Sep 17 00:00:00 2001 From: Chen Huang Date: Thu, 29 Apr 2021 07:03:48 +0000 Subject: riscv: stacktrace: fix the riscv stacktrace when CONFIG_FRAME_POINTER enabled As [1] and [2] said, the arch_stack_walk should not to trace itself, or it will leave the trace unexpectedly when called. The example is when we do "cat /sys/kernel/debug/page_owner", all pages' stack is the same. arch_stack_walk+0x18/0x20 stack_trace_save+0x40/0x60 register_dummy_stack+0x24/0x5e init_page_owner+0x2e So we use __builtin_frame_address(1) as the first frame to be walked. And mark the arch_stack_walk() noinline. We found that pr_cont will affact pages' stack whose task state is RUNNING when testing "echo t > /proc/sysrq-trigger". So move the place of pr_cont and mark the function dump_backtrace() noinline. Also we move the case when task == NULL into else branch, and test for it in "echo c > /proc/sysrq-trigger". [1] https://lore.kernel.org/lkml/20210319184106.5688-1-mark.rutland@arm.com/ [2] https://lore.kernel.org/lkml/20210317142050.57712-1-chenjun102@huawei.com/ Signed-off-by: Chen Huang Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/stacktrace.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 2b3e0cb90d78..bde85fc53357 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -27,10 +27,10 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame_pointer(regs); sp = user_stack_pointer(regs); pc = instruction_pointer(regs); - } else if (task == NULL || task == current) { - fp = (unsigned long)__builtin_frame_address(0); - sp = sp_in_global; - pc = (unsigned long)walk_stackframe; + } else if (task == current) { + fp = (unsigned long)__builtin_frame_address(1); + sp = (unsigned long)__builtin_frame_address(0); + pc = (unsigned long)__builtin_return_address(0); } else { /* task blocked in __switch_to */ fp = task->thread.s[0]; @@ -106,15 +106,15 @@ static bool print_trace_address(void *arg, unsigned long pc) return true; } -void dump_backtrace(struct pt_regs *regs, struct task_struct *task, +noinline void dump_backtrace(struct pt_regs *regs, struct task_struct *task, const char *loglvl) { - pr_cont("%sCall Trace:\n", loglvl); walk_stackframe(task, regs, print_trace_address, (void *)loglvl); } void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { + pr_cont("%sCall Trace:\n", loglvl); dump_backtrace(NULL, task, loglvl); } @@ -139,7 +139,7 @@ unsigned long get_wchan(struct task_struct *task) #ifdef CONFIG_STACKTRACE -void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, +noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, struct task_struct *task, struct pt_regs *regs) { walk_stackframe(task, regs, consume_entry, cookie); -- cgit From 97a031082320897ee5b06352d0ab3d7cf47321d3 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 7 May 2021 17:47:15 +0800 Subject: riscv: Select ARCH_USE_MEMTEST As of commit dce44566192e ("mm/memtest: add ARCH_USE_MEMTEST"), architectures must select ARCH_USE_MEMTESET to enable CONFIG_MEMTEST. Signed-off-by: Kefeng Wang Fixes: f6e5aedf470b ("riscv: Add support for memtest") Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a8ad8eb76120..c5914e70a0fd 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -34,6 +34,7 @@ config RISCV select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_SUPPORTS_HUGETLBFS if MMU + select ARCH_USE_MEMTEST select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if 64BIT -- cgit From 02ccdeed1817a587161ad091887e11ac8a2586b2 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sat, 8 May 2021 23:43:47 +0800 Subject: riscv: kprobes: Fix build error when MMU=n lkp reported a randconfig failure: arch/riscv/kernel/probes/kprobes.c:90:22: error: use of undeclared identifier 'PAGE_KERNEL_READ_EXEC' We implemented the alloc_insn_page() to allocate PAGE_KERNEL_READ_EXEC page for kprobes insn page for STRICT_MODULE_RWX. But if MMU=n, we should fall back to the generic weak alloc_insn_page() by generic kprobe subsystem. Fixes: cdd1b2bd358f ("riscv: kprobes: Implement alloc_insn_page()") Signed-off-by: Jisheng Zhang Reported-by: kernel test robot Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/kprobes.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 10b965c34536..15cc65ac7ca6 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -84,6 +84,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } +#ifdef CONFIG_MMU void *alloc_insn_page(void) { return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, @@ -91,6 +92,7 @@ void *alloc_insn_page(void) VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, __builtin_return_address(0)); } +#endif /* install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) -- cgit From bab0d47c0ebb50ae0bcfa4e84986a60113bf7d6b Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sun, 9 May 2021 00:44:43 +0800 Subject: riscv: kexec: Fix W=1 build warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following W=1 build warning(s): In file included from include/linux/kexec.h:28, from arch/riscv/kernel/machine_kexec.c:7: arch/riscv/include/asm/kexec.h:45:1: warning: ‘extern’ is not at beginning of declaration [-Wold-style-declaration] 45 | const extern unsigned char riscv_kexec_relocate[]; | ^~~~~ arch/riscv/include/asm/kexec.h:46:1: warning: ‘extern’ is not at beginning of declaration [-Wold-style-declaration] 46 | const extern unsigned int riscv_kexec_relocate_size; | ^~~~~ arch/riscv/kernel/machine_kexec.c:125:6: warning: no previous prototype for ‘machine_shutdown’ [-Wmissing-prototypes] 125 | void machine_shutdown(void) | ^~~~~~~~~~~~~~~~ arch/riscv/kernel/machine_kexec.c:147:1: warning: no previous prototype for ‘machine_crash_shutdown’ [-Wmissing-prototypes] 147 | machine_crash_shutdown(struct pt_regs *regs) | ^~~~~~~~~~~~~~~~~~~~~~ arch/riscv/kernel/machine_kexec.c:23: warning: Function parameter or member 'image' not described in 'kexec_image_info' arch/riscv/kernel/machine_kexec.c:53: warning: Function parameter or member 'image' not described in 'machine_kexec_prepare' arch/riscv/kernel/machine_kexec.c:114: warning: Function parameter or member 'image' not described in 'machine_kexec_cleanup' arch/riscv/kernel/machine_kexec.c:148: warning: Function parameter or member 'regs' not described in 'machine_crash_shutdown' arch/riscv/kernel/machine_kexec.c:167: warning: Function parameter or member 'image' not described in 'machine_kexec' Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/kexec.h | 4 ++-- arch/riscv/kernel/machine_kexec.c | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index 1e954101906a..e4e291d40759 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -42,8 +42,8 @@ struct kimage_arch { unsigned long fdt_addr; }; -const extern unsigned char riscv_kexec_relocate[]; -const extern unsigned int riscv_kexec_relocate_size; +extern const unsigned char riscv_kexec_relocate[]; +extern const unsigned int riscv_kexec_relocate_size; typedef void (*riscv_kexec_method)(unsigned long first_ind_entry, unsigned long jump_addr, diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index cc048143fba5..9e99e1db156b 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -14,8 +14,9 @@ #include /* For set_memory_x() */ #include /* For unreachable() */ #include /* For cpu_down() */ +#include -/** +/* * kexec_image_info - Print received image details */ static void @@ -39,7 +40,7 @@ kexec_image_info(const struct kimage *image) } } -/** +/* * machine_kexec_prepare - Initialize kexec * * This function is called from do_kexec_load, when the user has @@ -100,7 +101,7 @@ machine_kexec_prepare(struct kimage *image) } -/** +/* * machine_kexec_cleanup - Cleanup any leftovers from * machine_kexec_prepare * @@ -135,7 +136,7 @@ void machine_shutdown(void) #endif } -/** +/* * machine_crash_shutdown - Prepare to kexec after a kernel crash * * This function is called by crash_kexec just before machine_kexec @@ -151,7 +152,7 @@ machine_crash_shutdown(struct pt_regs *regs) pr_info("Starting crashdump kernel...\n"); } -/** +/* * machine_kexec - Jump to the loaded kimage * * This function is called by kernel_kexec which is called by the -- cgit From e69012400b0cb42b2070748322cb72f9effec00f Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 25 May 2021 10:45:51 +0800 Subject: arm64: mm: don't use CON and BLK mapping if KFENCE is enabled When we added KFENCE support for arm64, we intended that it would force the entire linear map to be mapped at page granularity, but we only enforced this in arch_add_memory() and not in map_mem(), so memory mapped at boot time can be mapped at a larger granularity. When booting a kernel with KFENCE=y and RODATA_FULL=n, this results in the following WARNING at boot: [ 0.000000] ------------[ cut here ]------------ [ 0.000000] WARNING: CPU: 0 PID: 0 at mm/memory.c:2462 apply_to_pmd_range+0xec/0x190 [ 0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.13.0-rc1+ #10 [ 0.000000] Hardware name: linux,dummy-virt (DT) [ 0.000000] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO BTYPE=--) [ 0.000000] pc : apply_to_pmd_range+0xec/0x190 [ 0.000000] lr : __apply_to_page_range+0x94/0x170 [ 0.000000] sp : ffffffc010573e20 [ 0.000000] x29: ffffffc010573e20 x28: ffffff801f400000 x27: ffffff801f401000 [ 0.000000] x26: 0000000000000001 x25: ffffff801f400fff x24: ffffffc010573f28 [ 0.000000] x23: ffffffc01002b710 x22: ffffffc0105fa450 x21: ffffffc010573ee4 [ 0.000000] x20: ffffff801fffb7d0 x19: ffffff801f401000 x18: 00000000fffffffe [ 0.000000] x17: 000000000000003f x16: 000000000000000a x15: ffffffc01060b940 [ 0.000000] x14: 0000000000000000 x13: 0098968000000000 x12: 0000000098968000 [ 0.000000] x11: 0000000000000000 x10: 0000000098968000 x9 : 0000000000000001 [ 0.000000] x8 : 0000000000000000 x7 : ffffffc010573ee4 x6 : 0000000000000001 [ 0.000000] x5 : ffffffc010573f28 x4 : ffffffc01002b710 x3 : 0000000040000000 [ 0.000000] x2 : ffffff801f5fffff x1 : 0000000000000001 x0 : 007800005f400705 [ 0.000000] Call trace: [ 0.000000] apply_to_pmd_range+0xec/0x190 [ 0.000000] __apply_to_page_range+0x94/0x170 [ 0.000000] apply_to_page_range+0x10/0x20 [ 0.000000] __change_memory_common+0x50/0xdc [ 0.000000] set_memory_valid+0x30/0x40 [ 0.000000] kfence_init_pool+0x9c/0x16c [ 0.000000] kfence_init+0x20/0x98 [ 0.000000] start_kernel+0x284/0x3f8 Fixes: 840b23986344 ("arm64, kfence: enable KFENCE for ARM64") Cc: # 5.12.x Signed-off-by: Jisheng Zhang Acked-by: Mark Rutland Acked-by: Marco Elver Tested-by: Marco Elver Link: https://lore.kernel.org/r/20210525104551.2ec37f77@xhacker.debian Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6dd9369e3ea0..89b66ef43a0f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -515,7 +515,8 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (rodata_full || crash_mem_map || debug_pagealloc_enabled()) + if (rodata_full || crash_mem_map || debug_pagealloc_enabled() || + IS_ENABLED(CONFIG_KFENCE)) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* -- cgit From ff4cff962a7eedc73e54b5096693da7f86c61346 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 16 May 2021 17:01:08 -0700 Subject: MIPS: alchemy: xxs1500: add gpio-au1000.h header file board-xxs1500.c references 2 functions without declaring them, so add the header file to placate the build. ../arch/mips/alchemy/board-xxs1500.c: In function 'board_setup': ../arch/mips/alchemy/board-xxs1500.c:56:2: error: implicit declaration of function 'alchemy_gpio1_input_enable' [-Werror=implicit-function-declaration] 56 | alchemy_gpio1_input_enable(); ../arch/mips/alchemy/board-xxs1500.c:57:2: error: implicit declaration of function 'alchemy_gpio2_enable'; did you mean 'alchemy_uart_enable'? [-Werror=implicit-function-declaration] 57 | alchemy_gpio2_enable(); Fixes: 8e026910fcd4 ("MIPS: Alchemy: merge GPR/MTX-1/XXS1500 board code into single files") Signed-off-by: Randy Dunlap Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Cc: Manuel Lauss Cc: Ralf Baechle Acked-by: Manuel Lauss Signed-off-by: Thomas Bogendoerfer --- arch/mips/alchemy/board-xxs1500.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/mips/alchemy/board-xxs1500.c b/arch/mips/alchemy/board-xxs1500.c index b184baa4e56a..f175bce2987f 100644 --- a/arch/mips/alchemy/board-xxs1500.c +++ b/arch/mips/alchemy/board-xxs1500.c @@ -18,6 +18,7 @@ #include #include #include +#include #include const char *get_system_type(void) -- cgit From 6855adc2c5d9dff08be9e6e01deb319738b28780 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 20 May 2021 22:13:43 -0700 Subject: MIPS: launch.h: add include guard to prevent build errors arch/mips/include/asm/mips-boards/launch.h needs an include guard to prevent it from being #included more than once. Prevents these build errors: In file included from ../arch/mips/mti-malta/malta-amon.c:16: ../arch/mips/include/asm/mips-boards/launch.h:8:8: error: redefinition of 'struct cpulaunch' 8 | struct cpulaunch { | ^~~~~~~~~ In file included from ../arch/mips/include/asm/mips-cps.h:13, from ../arch/mips/include/asm/smp-ops.h:16, from ../arch/mips/include/asm/smp.h:21, from ../include/linux/smp.h:114, from ../arch/mips/mti-malta/malta-amon.c:12: ../arch/mips/include/asm/mips-boards/launch.h:8:8: note: originally defined here 8 | struct cpulaunch { | ^~~~~~~~~ make[3]: [../scripts/Makefile.build:273: arch/mips/mti-malta/malta-amon.o] Error 1 (ignored) Fixes: 6decd1aad15f ("MIPS: add support for buggy MT7621S core detection") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Cc: Ilya Lipnitskiy Reviewed-by: Ilya Lipnitskiy Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/mips-boards/launch.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/mips/include/asm/mips-boards/launch.h b/arch/mips/include/asm/mips-boards/launch.h index f93aa5ee2e2e..3481ed4c117b 100644 --- a/arch/mips/include/asm/mips-boards/launch.h +++ b/arch/mips/include/asm/mips-boards/launch.h @@ -3,6 +3,9 @@ * */ +#ifndef _ASM_MIPS_BOARDS_LAUNCH_H +#define _ASM_MIPS_BOARDS_LAUNCH_H + #ifndef _ASSEMBLER_ struct cpulaunch { @@ -34,3 +37,5 @@ struct cpulaunch { /* Polling period in count cycles for secondary CPU's */ #define LAUNCHPERIOD 10000 + +#endif /* _ASM_MIPS_BOARDS_LAUNCH_H */ -- cgit From fef532ea0cd871afab7d9a7b6e9da99ac2c24371 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 16 May 2021 17:54:17 -0700 Subject: MIPS: ralink: export rt_sysc_membase for rt2880_wdt.c rt2880_wdt.c uses (well, attempts to use) rt_sysc_membase. However, when this watchdog driver is built as a loadable module, there is a build error since the rt_sysc_membase symbol is not exported. Export it to quell the build error. ERROR: modpost: "rt_sysc_membase" [drivers/watchdog/rt2880_wdt.ko] undefined! Fixes: 473cf939ff34 ("watchdog: add ralink watchdog driver") Signed-off-by: Randy Dunlap Cc: Guenter Roeck Cc: Wim Van Sebroeck Cc: John Crispin Cc: linux-mips@vger.kernel.org Cc: linux-watchdog@vger.kernel.org Acked-by: Guenter Roeck Signed-off-by: Thomas Bogendoerfer --- arch/mips/ralink/of.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 0c5de07da097..0135376c5de5 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -25,6 +26,7 @@ __iomem void *rt_sysc_membase; __iomem void *rt_memc_membase; +EXPORT_SYMBOL_GPL(rt_sysc_membase); __iomem void *plat_of_remap_node(const char *node) { -- cgit From 78cf0eb926cb1abeff2106bae67752e032fe5f3e Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 15 May 2021 19:02:01 +0800 Subject: MIPS: Fix kernel hang under FUNCTION_GRAPH_TRACER and PREEMPT_TRACER When update the latest mainline kernel with the following three configs, the kernel hangs during startup: (1) CONFIG_FUNCTION_GRAPH_TRACER=y (2) CONFIG_PREEMPT_TRACER=y (3) CONFIG_FTRACE_STARTUP_TEST=y When update the latest mainline kernel with the above two configs (1) and (2), the kernel starts normally, but it still hangs when execute the following command: echo "function_graph" > /sys/kernel/debug/tracing/current_tracer Without CONFIG_PREEMPT_TRACER=y, the above two kinds of kernel hangs disappeared, so it seems that CONFIG_PREEMPT_TRACER has some influences with function_graph tracer at the first glance. I use ejtag to find out the epc address is related with preempt_enable() in the file arch/mips/lib/mips-atomic.c, because function tracing can trace the preempt_{enable,disable} calls that are traced, replace them with preempt_{enable,disable}_notrace to prevent function tracing from going into an infinite loop, and then it can fix the kernel hang issue. By the way, it seems that this commit is a complement and improvement of commit f93a1a00f2bd ("MIPS: Fix crash that occurs when function tracing is enabled"). Signed-off-by: Tiezhu Yang Cc: Steven Rostedt Signed-off-by: Thomas Bogendoerfer --- arch/mips/lib/mips-atomic.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c index de03838b343b..a9b72eacfc0b 100644 --- a/arch/mips/lib/mips-atomic.c +++ b/arch/mips/lib/mips-atomic.c @@ -37,7 +37,7 @@ */ notrace void arch_local_irq_disable(void) { - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -53,7 +53,7 @@ notrace void arch_local_irq_disable(void) : /* no inputs */ : "memory"); - preempt_enable(); + preempt_enable_notrace(); } EXPORT_SYMBOL(arch_local_irq_disable); @@ -61,7 +61,7 @@ notrace unsigned long arch_local_irq_save(void) { unsigned long flags; - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -78,7 +78,7 @@ notrace unsigned long arch_local_irq_save(void) : /* no inputs */ : "memory"); - preempt_enable(); + preempt_enable_notrace(); return flags; } @@ -88,7 +88,7 @@ notrace void arch_local_irq_restore(unsigned long flags) { unsigned long __tmp1; - preempt_disable(); + preempt_disable_notrace(); __asm__ __volatile__( " .set push \n" @@ -106,7 +106,7 @@ notrace void arch_local_irq_restore(unsigned long flags) : "0" (flags) : "memory"); - preempt_enable(); + preempt_enable_notrace(); } EXPORT_SYMBOL(arch_local_irq_restore); -- cgit From e89d6cc51034998607502cd3899173bfa7189571 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 May 2021 09:29:44 +0100 Subject: arm64: assembler: replace `kaddr` with `addr` The `__dcache_op_workaround_clean_cache` and `dcache_by_line_op` macros are only expected to be usedc on kernel memory, without a user fault fixup, and so we named their address variables `kaddr` to make this clear. Subseuqent patches will modify these to also work on user memory with an (optional) user fault fixup, where `kaddr` won't make as much sense. To aid the legibility of patches, this patch (only) replaces `kaddr` with `addr` as a preparatory step. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Signed-off-by: Fuad Tabba Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Fuad Tabba Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-2-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 8418c1bd8f04..6a0fbc599196 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -377,47 +377,47 @@ alternative_cb_end /* * Macro to perform a data cache maintenance for the interval - * [kaddr, kaddr + size) + * [addr, addr + size) * * op: operation passed to dc instruction * domain: domain used in dsb instruciton - * kaddr: starting virtual address of the region + * addr: starting virtual address of the region * size: size of the region - * Corrupts: kaddr, size, tmp1, tmp2 + * Corrupts: addr, size, tmp1, tmp2 */ - .macro __dcache_op_workaround_clean_cache, op, kaddr + .macro __dcache_op_workaround_clean_cache, op, addr alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE - dc \op, \kaddr + dc \op, \addr alternative_else - dc civac, \kaddr + dc civac, \addr alternative_endif .endm - .macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2 + .macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2 dcache_line_size \tmp1, \tmp2 - add \size, \kaddr, \size + add \size, \addr, \size sub \tmp2, \tmp1, #1 - bic \kaddr, \kaddr, \tmp2 + bic \addr, \addr, \tmp2 9998: .ifc \op, cvau - __dcache_op_workaround_clean_cache \op, \kaddr + __dcache_op_workaround_clean_cache \op, \addr .else .ifc \op, cvac - __dcache_op_workaround_clean_cache \op, \kaddr + __dcache_op_workaround_clean_cache \op, \addr .else .ifc \op, cvap - sys 3, c7, c12, 1, \kaddr // dc cvap + sys 3, c7, c12, 1, \addr // dc cvap .else .ifc \op, cvadp - sys 3, c7, c13, 1, \kaddr // dc cvadp + sys 3, c7, c13, 1, \addr // dc cvadp .else - dc \op, \kaddr + dc \op, \addr .endif .endif .endif .endif - add \kaddr, \kaddr, \tmp1 - cmp \kaddr, \size + add \addr, \addr, \tmp1 + cmp \addr, \size b.lo 9998b dsb \domain .endm -- cgit From d11b187760f52480dd83bda0429ee3c94e542b1d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 May 2021 09:29:45 +0100 Subject: arm64: assembler: add conditional cache fixups It would be helpful if we could use both `dcache_by_line_op` and `invalidate_icache_by_line` for user memory without accidentally fixing up unexpected faults when performing maintenance on kernel addresses. Let's make this possible by having both macros take an optional fixup label, and only generating an extable entry if a label is provided. At the same time, let's clean up the labels used to be globally unique using \@ as we do for other macros. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Signed-off-by: Fuad Tabba Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Fuad Tabba Cc: Will Deacon Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-3-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 39 +++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 6a0fbc599196..0a276b46ef50 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -130,15 +130,27 @@ alternative_endif .endm /* - * Emit an entry into the exception table + * Create an exception table entry for `insn`, which will branch to `fixup` + * when an unhandled fault is taken. */ - .macro _asm_extable, from, to + .macro _asm_extable, insn, fixup .pushsection __ex_table, "a" .align 3 - .long (\from - .), (\to - .) + .long (\insn - .), (\fixup - .) .popsection .endm +/* + * Create an exception table entry for `insn` if `fixup` is provided. Otherwise + * do nothing. + */ + .macro _cond_extable, insn, fixup + .ifnc \fixup, + _asm_extable \insn, \fixup + .endif + .endm + + #define USER(l, x...) \ 9999: x; \ _asm_extable 9999b, l @@ -383,6 +395,7 @@ alternative_cb_end * domain: domain used in dsb instruciton * addr: starting virtual address of the region * size: size of the region + * fixup: optional label to branch to on user fault * Corrupts: addr, size, tmp1, tmp2 */ .macro __dcache_op_workaround_clean_cache, op, addr @@ -393,12 +406,12 @@ alternative_else alternative_endif .endm - .macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2 + .macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2, fixup dcache_line_size \tmp1, \tmp2 add \size, \addr, \size sub \tmp2, \tmp1, #1 bic \addr, \addr, \tmp2 -9998: +.Ldcache_op\@: .ifc \op, cvau __dcache_op_workaround_clean_cache \op, \addr .else @@ -418,8 +431,10 @@ alternative_endif .endif add \addr, \addr, \tmp1 cmp \addr, \size - b.lo 9998b + b.lo .Ldcache_op\@ dsb \domain + + _cond_extable .Ldcache_op\@, \fixup .endm /* @@ -427,20 +442,22 @@ alternative_endif * [start, end) * * start, end: virtual addresses describing the region - * label: A label to branch to on user fault. + * fixup: optional label to branch to on user fault * Corrupts: tmp1, tmp2 */ - .macro invalidate_icache_by_line start, end, tmp1, tmp2, label + .macro invalidate_icache_by_line start, end, tmp1, tmp2, fixup icache_line_size \tmp1, \tmp2 sub \tmp2, \tmp1, #1 bic \tmp2, \start, \tmp2 -9997: -USER(\label, ic ivau, \tmp2) // invalidate I line PoU +.Licache_op\@: + ic ivau, \tmp2 // invalidate I line PoU add \tmp2, \tmp2, \tmp1 cmp \tmp2, \end - b.lo 9997b + b.lo .Licache_op\@ dsb ish isb + + _cond_extable .Licache_op\@, \fixup .endm /* -- cgit From 46710cf1fcb6235388e8d80619cdf2c196ad554b Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:46 +0100 Subject: arm64: Apply errata to swsusp_arch_suspend_exit The Arm errata covered by ARM64_WORKAROUND_CLEAN_CACHE require that "dc cvau" instructions get promoted to "dc civac". Reported-by: Mark Rutland Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-4-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate-asm.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 8ccca660034e..0ed2f72a6b94 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -91,7 +91,8 @@ SYM_CODE_START(swsusp_arch_suspend_exit) raw_dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x10, x3 -2: dc cvau, x4 /* clean D line / unified line */ +2: /* clean D line / unified line */ +alternative_insn "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE add x4, x4, x2 cmp x4, x1 b.lo 2b -- cgit From 116b7f559492b719ae4bd22ee773cb7fb046a736 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:47 +0100 Subject: arm64: Do not enable uaccess for flush_icache_range __flush_icache_range works on kernel addresses, and doesn't need uaccess. The existing code is a side-effect of its current implementation with __flush_cache_user_range fallthrough. Instead of fallthrough to share the code, use a common macro for the two where the caller specifies an optional fixup label if user access is needed. If provided, this label would be used to generate an extable entry. Simplify the code to use dcache_by_line_op, instead of replicating much of its functionality. No functional change intended. Possible performance impact due to the reduced number of instructions. Reported-by: Catalin Marinas Reported-by: Will Deacon Reported-by: Mark Rutland Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/ Link: https://lore.kernel.org/linux-arm-kernel/20210521121846.GB1040@C02TD0UTHF1T.local/ Signed-off-by: Fuad Tabba Acked-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-5-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/mm/cache.S | 57 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 2d881f34dd9d..7c54bcbf5a36 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -14,6 +14,34 @@ #include #include +/* + * __flush_cache_range(start,end) [fixup] + * + * Ensure that the I and D caches are coherent within specified region. + * This is typically used when code has been written to a memory region, + * and will be executed. + * + * - start - virtual start address of region + * - end - virtual end address of region + * - fixup - optional label to branch to on user fault + */ +.macro __flush_cache_range, fixup +alternative_if ARM64_HAS_CACHE_IDC + dsb ishst + b .Ldc_skip_\@ +alternative_else_nop_endif + mov x2, x0 + sub x3, x1, x0 + dcache_by_line_op cvau, ish, x2, x3, x4, x5, \fixup +.Ldc_skip_\@: +alternative_if ARM64_HAS_CACHE_DIC + isb + b .Lic_skip_\@ +alternative_else_nop_endif + invalidate_icache_by_line x0, x1, x2, x3, \fixup +.Lic_skip_\@: +.endm + /* * flush_icache_range(start,end) * @@ -25,7 +53,9 @@ * - end - virtual end address of region */ SYM_FUNC_START(__flush_icache_range) - /* FALLTHROUGH */ + __flush_cache_range + ret +SYM_FUNC_END(__flush_icache_range) /* * __flush_cache_user_range(start,end) @@ -39,34 +69,15 @@ SYM_FUNC_START(__flush_icache_range) */ SYM_FUNC_START(__flush_cache_user_range) uaccess_ttbr0_enable x2, x3, x4 -alternative_if ARM64_HAS_CACHE_IDC - dsb ishst - b 7f -alternative_else_nop_endif - dcache_line_size x2, x3 - sub x3, x2, #1 - bic x4, x0, x3 -1: -user_alt 9f, "dc cvau, x4", "dc civac, x4", ARM64_WORKAROUND_CLEAN_CACHE - add x4, x4, x2 - cmp x4, x1 - b.lo 1b - dsb ish -7: -alternative_if ARM64_HAS_CACHE_DIC - isb - b 8f -alternative_else_nop_endif - invalidate_icache_by_line x0, x1, x2, x3, 9f -8: mov x0, #0 + __flush_cache_range 2f + mov x0, xzr 1: uaccess_ttbr0_disable x1, x2 ret -9: +2: mov x0, #-EFAULT b 1b -SYM_FUNC_END(__flush_icache_range) SYM_FUNC_END(__flush_cache_user_range) /* -- cgit From 7908072da535dca52b3a011ed6e1f73534546b59 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:48 +0100 Subject: arm64: Do not enable uaccess for invalidate_icache_range invalidate_icache_range() works on kernel addresses, and doesn't need uaccess. Remove the code that toggles uaccess_ttbr0_enable, as well as the code that emits an entry into the exception table (via the macro invalidate_icache_by_line). Changes return type of invalidate_icache_range() from int (which used to indicate a fault) to void, since it doesn't need uaccess and won't fault. Note that return value was never checked by any of the callers. No functional change intended. Possible performance impact due to the reduced number of instructions. Reported-by: Catalin Marinas Reported-by: Will Deacon Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/ Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Acked-by: Catalin Marinas Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-6-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/mm/cache.S | 11 +---------- 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 52e5c1623224..a586afa84172 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -57,7 +57,7 @@ * - size - region size */ extern void __flush_icache_range(unsigned long start, unsigned long end); -extern int invalidate_icache_range(unsigned long start, unsigned long end); +extern void invalidate_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); extern void __inval_dcache_area(void *addr, size_t len); extern void __clean_dcache_area_poc(void *addr, size_t len); diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 7c54bcbf5a36..14eac9d76d57 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -90,21 +90,12 @@ SYM_FUNC_END(__flush_cache_user_range) */ SYM_FUNC_START(invalidate_icache_range) alternative_if ARM64_HAS_CACHE_DIC - mov x0, xzr isb ret alternative_else_nop_endif - uaccess_ttbr0_enable x2, x3, x4 - - invalidate_icache_by_line x0, x1, x2, x3, 2f - mov x0, xzr -1: - uaccess_ttbr0_disable x1, x2 + invalidate_icache_by_line x0, x1, x2, x3 ret -2: - mov x0, #-EFAULT - b 1b SYM_FUNC_END(invalidate_icache_range) /* -- cgit From 5e20e3499682c4f1724438d23afcafd473526a54 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:49 +0100 Subject: arm64: Downgrade flush_icache_range to invalidate Since __flush_dcache_area is called right before, invalidate_icache_range is sufficient in this case. Rewrite the comment to better explain the rationale behind the cache maintenance operations used here. No functional change intended. Possible performance impact due to invalidating only the icache rather than invalidating and cleaning both caches. Reported-by: Catalin Marinas Reported-by: Will Deacon Link: https://lore.kernel.org/linux-arch/20200511110014.lb9PEahJ4hVOYrbwIb_qUHXyNy9KQzNFdb_I3YlzY6A@z/ Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Acked-by: Catalin Marinas Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-7-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 90a335c74442..a03944fd0cd4 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -68,10 +68,14 @@ int machine_kexec_post_load(struct kimage *kimage) kimage->arch.kern_reloc = __pa(reloc_code); kexec_image_info(kimage); - /* Flush the reloc_code in preparation for its execution. */ + /* + * For execution with the MMU off, reloc_code needs to be cleaned to the + * PoC and invalidated from the I-cache. + */ __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size); - flush_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code + - arm64_relocate_new_kernel_size); + invalidate_icache_range((uintptr_t)reloc_code, + (uintptr_t)reloc_code + + arm64_relocate_new_kernel_size); return 0; } -- cgit From 55272ecc3ada8ec947bb5e94ee2fcde6cf31e166 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:50 +0100 Subject: arm64: assembler: remove user_alt user_alt isn't being used anymore. It's also simpler and clearer to directly use alternative_insn and _cond_extable in-line when needed. Reported-by: Mark Rutland Link: https://lore.kernel.org/linux-arm-kernel/20210520125735.GF17233@C02TD0UTHF1T.local/ Signed-off-by: Fuad Tabba Acked-by: Mark Rutland Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-8-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative-macros.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index 8a078fc662ac..477703578caa 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -197,11 +197,6 @@ alternative_endif #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) -.macro user_alt, label, oldinstr, newinstr, cond -9999: alternative_insn "\oldinstr", "\newinstr", \cond - _asm_extable 9999b, \label -.endm - #endif /* __ASSEMBLY__ */ /* -- cgit From 06b7a568ca5e9cb79a0cc4737f498ea90d8fa89d Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:51 +0100 Subject: arm64: Move documentation of dcache_by_line_op The comment describing the macro dcache_by_line_op is placed right before the previous macro of the one it describes, which is a bit confusing. Move it to the macro it describes (dcache_by_line_op). No functional change intended. Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-9-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 0a276b46ef50..ced791124b28 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -387,6 +387,14 @@ alternative_cb_end bfi \tcr, \tmp0, \pos, #3 .endm + .macro __dcache_op_workaround_clean_cache, op, addr +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc \op, \addr +alternative_else + dc civac, \addr +alternative_endif + .endm + /* * Macro to perform a data cache maintenance for the interval * [addr, addr + size) @@ -398,14 +406,6 @@ alternative_cb_end * fixup: optional label to branch to on user fault * Corrupts: addr, size, tmp1, tmp2 */ - .macro __dcache_op_workaround_clean_cache, op, addr -alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE - dc \op, \addr -alternative_else - dc civac, \addr -alternative_endif - .endm - .macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2, fixup dcache_line_size \tmp1, \tmp2 add \size, \addr, \size -- cgit From d044f8141847bee542998a6fd8de2c270fe40e48 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:52 +0100 Subject: arm64: Fix comments to refer to correct function __flush_icache_range Many comments refer to the function flush_icache_range, where the intent is in fact __flush_icache_range. Fix these comments to refer to the intended function. That's probably due to commit 3b8c9f1cdfc506e9 ("arm64: IPI each CPU after invalidating the I-cache for kernel mappings"), which renamed flush_icache_range() to __flush_icache_range() and added a wrapper. No functional change intended. Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-10-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/hibernate-asm.S | 4 ++-- arch/arm64/mm/cache.S | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 0ed2f72a6b94..ef2ab7caf815 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -45,7 +45,7 @@ * Because this code has to be copied to a 'safe' page, it can't call out to * other functions by PC-relative address. Also remember that it may be * mid-way through over-writing other functions. For this reason it contains - * code from flush_icache_range() and uses the copy_page() macro. + * code from __flush_icache_range() and uses the copy_page() macro. * * This 'safe' page is mapped via ttbr0, and executed from there. This function * switches to a copy of the linear map in ttbr1, performs the restore, then @@ -87,7 +87,7 @@ SYM_CODE_START(swsusp_arch_suspend_exit) copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 add x1, x10, #PAGE_SIZE - /* Clean the copied page to PoU - based on flush_icache_range() */ + /* Clean the copied page to PoU - based on __flush_icache_range() */ raw_dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x10, x3 diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 14eac9d76d57..910ae8f6a389 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -43,7 +43,7 @@ alternative_else_nop_endif .endm /* - * flush_icache_range(start,end) + * __flush_icache_range(start,end) * * Ensure that the I and D caches are coherent within specified region. * This is typically used when code has been written to a memory region, -- cgit From e3974adb4ef591e898956083a3dfa6336bb88638 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:53 +0100 Subject: arm64: __inval_dcache_area to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. Because the code is shared with __dma_inv_area, it changes the parameters for that as well. However, __dma_inv_area is local to cache.S, so no other users are affected. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-11-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/kernel/head.S | 5 +---- arch/arm64/mm/cache.S | 16 +++++++++------- arch/arm64/mm/flush.c | 2 +- 4 files changed, 12 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index a586afa84172..157234706817 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -59,7 +59,7 @@ extern void __flush_icache_range(unsigned long start, unsigned long end); extern void invalidate_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); -extern void __inval_dcache_area(void *addr, size_t len); +extern void __inval_dcache_area(unsigned long start, unsigned long end); extern void __clean_dcache_area_poc(void *addr, size_t len); extern void __clean_dcache_area_pop(void *addr, size_t len); extern void __clean_dcache_area_pou(void *addr, size_t len); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 96873dfa67fd..8df0ac8d9123 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -117,7 +117,7 @@ SYM_CODE_START_LOCAL(preserve_boot_args) dmb sy // needed before dc ivac with // MMU off - mov x1, #0x20 // 4 x 8 bytes + add x1, x0, #0x20 // 4 x 8 bytes b __inval_dcache_area // tail call SYM_CODE_END(preserve_boot_args) @@ -268,7 +268,6 @@ SYM_FUNC_START_LOCAL(__create_page_tables) */ adrp x0, init_pg_dir adrp x1, init_pg_end - sub x1, x1, x0 bl __inval_dcache_area /* @@ -382,12 +381,10 @@ SYM_FUNC_START_LOCAL(__create_page_tables) adrp x0, idmap_pg_dir adrp x1, idmap_pg_end - sub x1, x1, x0 bl __inval_dcache_area adrp x0, init_pg_dir adrp x1, init_pg_end - sub x1, x1, x0 bl __inval_dcache_area ret x28 diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 910ae8f6a389..03c1a7659ffb 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -131,25 +131,24 @@ alternative_else_nop_endif SYM_FUNC_END(__clean_dcache_area_pou) /* - * __inval_dcache_area(kaddr, size) + * __inval_dcache_area(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are invalidated. Any partial lines at the ends of the interval are * also cleaned to PoC to prevent data loss. * - * - kaddr - kernel address - * - size - size in question + * - start - kernel start address of region + * - end - kernel end address of region */ SYM_FUNC_START_LOCAL(__dma_inv_area) SYM_FUNC_START_PI(__inval_dcache_area) /* FALLTHROUGH */ /* - * __dma_inv_area(start, size) + * __dma_inv_area(start, end) * - start - virtual start address of region - * - size - size in question + * - end - virtual end address of region */ - add x1, x1, x0 dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -230,8 +229,10 @@ SYM_FUNC_END_PI(__dma_flush_area) * - dir - DMA direction */ SYM_FUNC_START_PI(__dma_map_area) + add x1, x0, x1 cmp w2, #DMA_FROM_DEVICE b.eq __dma_inv_area + sub x1, x1, x0 b __dma_clean_area SYM_FUNC_END_PI(__dma_map_area) @@ -242,6 +243,7 @@ SYM_FUNC_END_PI(__dma_map_area) * - dir - DMA direction */ SYM_FUNC_START_PI(__dma_unmap_area) + add x1, x0, x1 cmp w2, #DMA_TO_DEVICE b.ne __dma_inv_area ret diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 6d44c028d1c9..be650b573b2a 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -90,7 +90,7 @@ EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); void arch_invalidate_pmem(void *addr, size_t size) { - __inval_dcache_area(addr, size); + __inval_dcache_area((unsigned long)addr, (unsigned long)addr + size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); #endif -- cgit From 163d3f80695e31068c7d32244c9e6d406d5c5c00 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:54 +0100 Subject: arm64: dcache_by_line_op to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-12-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 27 +++++++++++++-------------- arch/arm64/kvm/hyp/nvhe/cache.S | 1 + arch/arm64/mm/cache.S | 7 ++++++- 3 files changed, 20 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index ced791124b28..c4cecf85dccf 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -397,40 +397,39 @@ alternative_endif /* * Macro to perform a data cache maintenance for the interval - * [addr, addr + size) + * [start, end) * * op: operation passed to dc instruction * domain: domain used in dsb instruciton - * addr: starting virtual address of the region - * size: size of the region + * start: starting virtual address of the region + * end: end virtual address of the region * fixup: optional label to branch to on user fault - * Corrupts: addr, size, tmp1, tmp2 + * Corrupts: start, end, tmp1, tmp2 */ - .macro dcache_by_line_op op, domain, addr, size, tmp1, tmp2, fixup + .macro dcache_by_line_op op, domain, start, end, tmp1, tmp2, fixup dcache_line_size \tmp1, \tmp2 - add \size, \addr, \size sub \tmp2, \tmp1, #1 - bic \addr, \addr, \tmp2 + bic \start, \start, \tmp2 .Ldcache_op\@: .ifc \op, cvau - __dcache_op_workaround_clean_cache \op, \addr + __dcache_op_workaround_clean_cache \op, \start .else .ifc \op, cvac - __dcache_op_workaround_clean_cache \op, \addr + __dcache_op_workaround_clean_cache \op, \start .else .ifc \op, cvap - sys 3, c7, c12, 1, \addr // dc cvap + sys 3, c7, c12, 1, \start // dc cvap .else .ifc \op, cvadp - sys 3, c7, c13, 1, \addr // dc cvadp + sys 3, c7, c13, 1, \start // dc cvadp .else - dc \op, \addr + dc \op, \start .endif .endif .endif .endif - add \addr, \addr, \tmp1 - cmp \addr, \size + add \start, \start, \tmp1 + cmp \start, \end b.lo .Ldcache_op\@ dsb \domain diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 36cef6915428..3bcfa3cac46f 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -8,6 +8,7 @@ #include SYM_FUNC_START_PI(__flush_dcache_area) + add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__flush_dcache_area) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 03c1a7659ffb..fff883f691f2 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -31,7 +31,7 @@ alternative_if ARM64_HAS_CACHE_IDC b .Ldc_skip_\@ alternative_else_nop_endif mov x2, x0 - sub x3, x1, x0 + mov x3, x1 dcache_by_line_op cvau, ish, x2, x3, x4, x5, \fixup .Ldc_skip_\@: alternative_if ARM64_HAS_CACHE_DIC @@ -108,6 +108,7 @@ SYM_FUNC_END(invalidate_icache_range) * - size - size in question */ SYM_FUNC_START_PI(__flush_dcache_area) + add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__flush_dcache_area) @@ -126,6 +127,7 @@ alternative_if ARM64_HAS_CACHE_IDC dsb ishst ret alternative_else_nop_endif + add x1, x0, x1 dcache_by_line_op cvau, ish, x0, x1, x2, x3 ret SYM_FUNC_END(__clean_dcache_area_pou) @@ -187,6 +189,7 @@ SYM_FUNC_START_PI(__clean_dcache_area_poc) * - start - virtual start address of region * - size - size in question */ + add x1, x0, x1 dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__clean_dcache_area_poc) @@ -205,6 +208,7 @@ SYM_FUNC_START_PI(__clean_dcache_area_pop) alternative_if_not ARM64_HAS_DCPOP b __clean_dcache_area_poc alternative_else_nop_endif + add x1, x0, x1 dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__clean_dcache_area_pop) @@ -218,6 +222,7 @@ SYM_FUNC_END_PI(__clean_dcache_area_pop) * - size - size in question */ SYM_FUNC_START_PI(__dma_flush_area) + add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__dma_flush_area) -- cgit From 814b186079cd54d3fe3b6b8ab539cbd44705ef9d Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:55 +0100 Subject: arm64: __flush_dcache_area to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-13-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/arch_gicv3.h | 3 ++- arch/arm64/include/asm/cacheflush.h | 8 ++++---- arch/arm64/include/asm/efi.h | 2 +- arch/arm64/include/asm/kvm_mmu.h | 3 ++- arch/arm64/kernel/hibernate.c | 18 +++++++++++------- arch/arm64/kernel/idreg-override.c | 3 ++- arch/arm64/kernel/kaslr.c | 12 +++++++++--- arch/arm64/kernel/machine_kexec.c | 20 +++++++++++++------- arch/arm64/kernel/smp.c | 8 ++++++-- arch/arm64/kernel/smp_spin_table.c | 7 ++++--- arch/arm64/kvm/hyp/nvhe/cache.S | 1 - arch/arm64/kvm/hyp/nvhe/setup.c | 3 ++- arch/arm64/kvm/hyp/pgtable.c | 13 ++++++++++--- arch/arm64/mm/cache.S | 9 ++++----- 14 files changed, 70 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 934b9be582d2..ed1cc9d8e6df 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -124,7 +124,8 @@ static inline u32 gic_read_rpr(void) #define gic_read_lpir(c) readq_relaxed(c) #define gic_write_lpir(v, c) writeq_relaxed(v, c) -#define gic_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) +#define gic_flush_dcache_to_poc(a,l) \ + __flush_dcache_area((unsigned long)(a), (unsigned long)(a)+(l)) #define gits_read_baser(c) readq_relaxed(c) #define gits_write_baser(v, c) writeq_relaxed(v, c) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 157234706817..695f88864784 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -50,15 +50,15 @@ * - start - virtual start address * - end - virtual end address * - * __flush_dcache_area(kaddr, size) + * __flush_dcache_area(start, end) * * Ensure that the data held in page is written back. - * - kaddr - page address - * - size - region size + * - start - virtual start address + * - end - virtual end address */ extern void __flush_icache_range(unsigned long start, unsigned long end); extern void invalidate_icache_range(unsigned long start, unsigned long end); -extern void __flush_dcache_area(void *addr, size_t len); +extern void __flush_dcache_area(unsigned long start, unsigned long end); extern void __inval_dcache_area(unsigned long start, unsigned long end); extern void __clean_dcache_area_poc(void *addr, size_t len); extern void __clean_dcache_area_pop(void *addr, size_t len); diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 3578aba9c608..0ae2397076fd 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -137,7 +137,7 @@ void efi_virtmap_unload(void); static inline void efi_capsule_flush_cache_range(void *addr, int size) { - __flush_dcache_area(addr, size); + __flush_dcache_area((unsigned long)addr, (unsigned long)addr + size); } #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 25ed956f9af1..33293d5855af 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -180,7 +180,8 @@ static inline void *__kvm_vector_slot2addr(void *base, struct kvm; -#define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) +#define kvm_flush_dcache_to_poc(a,l) \ + __flush_dcache_area((unsigned long)(a), (unsigned long)(a)+(l)) static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index b1cef371df2b..b40ddce71507 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -240,8 +240,6 @@ static int create_safe_exec_page(void *src_start, size_t length, return 0; } -#define dcache_clean_range(start, end) __flush_dcache_area(start, (end - start)) - #ifdef CONFIG_ARM64_MTE static DEFINE_XARRAY(mte_pages); @@ -383,13 +381,18 @@ int swsusp_arch_suspend(void) ret = swsusp_save(); } else { /* Clean kernel core startup/idle code to PoC*/ - dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end); - dcache_clean_range(__idmap_text_start, __idmap_text_end); + __flush_dcache_area((unsigned long)__mmuoff_data_start, + (unsigned long)__mmuoff_data_end); + __flush_dcache_area((unsigned long)__idmap_text_start, + (unsigned long)__idmap_text_end); /* Clean kvm setup code to PoC? */ if (el2_reset_needed()) { - dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end); - dcache_clean_range(__hyp_text_start, __hyp_text_end); + __flush_dcache_area( + (unsigned long)__hyp_idmap_text_start, + (unsigned long)__hyp_idmap_text_end); + __flush_dcache_area((unsigned long)__hyp_text_start, + (unsigned long)__hyp_text_end); } swsusp_mte_restore_tags(); @@ -474,7 +477,8 @@ int swsusp_arch_resume(void) * The hibernate exit text contains a set of el2 vectors, that will * be executed at el2 with the mmu off in order to reload hyp-stub. */ - __flush_dcache_area(hibernate_exit, exit_size); + __flush_dcache_area((unsigned long)hibernate_exit, + (unsigned long)hibernate_exit + exit_size); /* * KASLR will cause the el2 vectors to be in a different location in diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index e628c8ce1ffe..3dd515baf526 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -237,7 +237,8 @@ asmlinkage void __init init_feature_override(void) for (i = 0; i < ARRAY_SIZE(regs); i++) { if (regs[i]->override) - __flush_dcache_area(regs[i]->override, + __flush_dcache_area((unsigned long)regs[i]->override, + (unsigned long)regs[i]->override + sizeof(*regs[i]->override)); } } diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 341342b207f6..49cccd03cb37 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -72,7 +72,9 @@ u64 __init kaslr_early_init(void) * we end up running with module randomization disabled. */ module_alloc_base = (u64)_etext - MODULES_VSIZE; - __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); + __flush_dcache_area((unsigned long)&module_alloc_base, + (unsigned long)&module_alloc_base + + sizeof(module_alloc_base)); /* * Try to map the FDT early. If this fails, we simply bail, @@ -170,8 +172,12 @@ u64 __init kaslr_early_init(void) module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; module_alloc_base &= PAGE_MASK; - __flush_dcache_area(&module_alloc_base, sizeof(module_alloc_base)); - __flush_dcache_area(&memstart_offset_seed, sizeof(memstart_offset_seed)); + __flush_dcache_area((unsigned long)&module_alloc_base, + (unsigned long)&module_alloc_base + + sizeof(module_alloc_base)); + __flush_dcache_area((unsigned long)&memstart_offset_seed, + (unsigned long)&memstart_offset_seed + + sizeof(memstart_offset_seed)); return offset; } diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index a03944fd0cd4..3e79110c8f3a 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -72,7 +72,9 @@ int machine_kexec_post_load(struct kimage *kimage) * For execution with the MMU off, reloc_code needs to be cleaned to the * PoC and invalidated from the I-cache. */ - __flush_dcache_area(reloc_code, arm64_relocate_new_kernel_size); + __flush_dcache_area((unsigned long)reloc_code, + (unsigned long)reloc_code + + arm64_relocate_new_kernel_size); invalidate_icache_range((uintptr_t)reloc_code, (uintptr_t)reloc_code + arm64_relocate_new_kernel_size); @@ -106,16 +108,18 @@ static void kexec_list_flush(struct kimage *kimage) for (entry = &kimage->head; ; entry++) { unsigned int flag; - void *addr; + unsigned long addr; /* flush the list entries. */ - __flush_dcache_area(entry, sizeof(kimage_entry_t)); + __flush_dcache_area((unsigned long)entry, + (unsigned long)entry + + sizeof(kimage_entry_t)); flag = *entry & IND_FLAGS; if (flag == IND_DONE) break; - addr = phys_to_virt(*entry & PAGE_MASK); + addr = (unsigned long)phys_to_virt(*entry & PAGE_MASK); switch (flag) { case IND_INDIRECTION: @@ -124,7 +128,7 @@ static void kexec_list_flush(struct kimage *kimage) break; case IND_SOURCE: /* flush the source pages. */ - __flush_dcache_area(addr, PAGE_SIZE); + __flush_dcache_area(addr, addr + PAGE_SIZE); break; case IND_DESTINATION: break; @@ -151,8 +155,10 @@ static void kexec_segment_flush(const struct kimage *kimage) kimage->segment[i].memsz, kimage->segment[i].memsz / PAGE_SIZE); - __flush_dcache_area(phys_to_virt(kimage->segment[i].mem), - kimage->segment[i].memsz); + __flush_dcache_area( + (unsigned long)phys_to_virt(kimage->segment[i].mem), + (unsigned long)phys_to_virt(kimage->segment[i].mem) + + kimage->segment[i].memsz); } } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dcd7041b2b07..5fcdee331087 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -122,7 +122,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) secondary_data.task = idle; secondary_data.stack = task_stack_page(idle) + THREAD_SIZE; update_cpu_boot_status(CPU_MMU_OFF); - __flush_dcache_area(&secondary_data, sizeof(secondary_data)); + __flush_dcache_area((unsigned long)&secondary_data, + (unsigned long)&secondary_data + + sizeof(secondary_data)); /* Now bring the CPU into our world */ ret = boot_secondary(cpu, idle); @@ -143,7 +145,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_crit("CPU%u: failed to come online\n", cpu); secondary_data.task = NULL; secondary_data.stack = NULL; - __flush_dcache_area(&secondary_data, sizeof(secondary_data)); + __flush_dcache_area((unsigned long)&secondary_data, + (unsigned long)&secondary_data + + sizeof(secondary_data)); status = READ_ONCE(secondary_data.status); if (status == CPU_MMU_OFF) status = READ_ONCE(__early_cpu_boot_status); diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index c45a83512805..58d804582a35 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -36,7 +36,7 @@ static void write_pen_release(u64 val) unsigned long size = sizeof(secondary_holding_pen_release); secondary_holding_pen_release = val; - __flush_dcache_area(start, size); + __flush_dcache_area((unsigned long)start, (unsigned long)start + size); } @@ -90,8 +90,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) * the boot protocol. */ writeq_relaxed(pa_holding_pen, release_addr); - __flush_dcache_area((__force void *)release_addr, - sizeof(*release_addr)); + __flush_dcache_area((__force unsigned long)release_addr, + (__force unsigned long)release_addr + + sizeof(*release_addr)); /* * Send an event to wake up the secondary CPU. diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 3bcfa3cac46f..36cef6915428 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -8,7 +8,6 @@ #include SYM_FUNC_START_PI(__flush_dcache_area) - add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__flush_dcache_area) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 7488f53b0aa2..5dffe928f256 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -134,7 +134,8 @@ static void update_nvhe_init_params(void) for (i = 0; i < hyp_nr_cpus; i++) { params = per_cpu_ptr(&kvm_init_params, i); params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd); - __flush_dcache_area(params, sizeof(*params)); + __flush_dcache_area((unsigned long)params, + (unsigned long)params + sizeof(*params)); } } diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c37c1dc4feaf..10d2f04013d4 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -839,8 +839,11 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, stage2_put_pte(ptep, mmu, addr, level, mm_ops); if (need_flush) { - __flush_dcache_area(kvm_pte_follow(pte, mm_ops), - kvm_granule_size(level)); + kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops); + + __flush_dcache_area((unsigned long)pte_follow, + (unsigned long)pte_follow + + kvm_granule_size(level)); } if (childp) @@ -988,11 +991,15 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct kvm_pgtable *pgt = arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; kvm_pte_t pte = *ptep; + kvm_pte_t *pte_follow; if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) return 0; - __flush_dcache_area(kvm_pte_follow(pte, mm_ops), kvm_granule_size(level)); + pte_follow = kvm_pte_follow(pte, mm_ops); + __flush_dcache_area((unsigned long)pte_follow, + (unsigned long)pte_follow + + kvm_granule_size(level)); return 0; } diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index fff883f691f2..b2880aeba7ca 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -99,16 +99,15 @@ alternative_else_nop_endif SYM_FUNC_END(invalidate_icache_range) /* - * __flush_dcache_area(kaddr, size) + * __flush_dcache_area(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are cleaned and invalidated to the PoC. * - * - kaddr - kernel address - * - size - size in question + * - start - virtual start address of region + * - end - virtual end address of region */ SYM_FUNC_START_PI(__flush_dcache_area) - add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__flush_dcache_area) -- cgit From 1f42faf1d25de2ae239f322fda8af1c92c20e953 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:56 +0100 Subject: arm64: __clean_dcache_area_poc to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. Because the code is shared with __dma_clean_area, it changes the parameters for that as well. However, __dma_clean_area is local to cache.S, so no other users are affected. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-14-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/kernel/efi-entry.S | 5 +++-- arch/arm64/mm/cache.S | 16 +++++++--------- 3 files changed, 11 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 695f88864784..3255878d6f30 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -60,7 +60,7 @@ extern void __flush_icache_range(unsigned long start, unsigned long end); extern void invalidate_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(unsigned long start, unsigned long end); extern void __inval_dcache_area(unsigned long start, unsigned long end); -extern void __clean_dcache_area_poc(void *addr, size_t len); +extern void __clean_dcache_area_poc(unsigned long start, unsigned long end); extern void __clean_dcache_area_pop(void *addr, size_t len); extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 0073b24b5d25..b0f728fb61f0 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -28,6 +28,7 @@ SYM_CODE_START(efi_enter_kernel) * stale icache entries from before relocation. */ ldr w1, =kernel_size + add x1, x0, x1 bl __clean_dcache_area_poc ic ialluis @@ -36,7 +37,7 @@ SYM_CODE_START(efi_enter_kernel) * so that we can safely disable the MMU and caches. */ adr x0, 0f - ldr w1, 3f + adr x1, 3f bl __clean_dcache_area_poc 0: /* Turn off Dcache and MMU */ @@ -64,5 +65,5 @@ SYM_CODE_START(efi_enter_kernel) mov x2, xzr mov x3, xzr br x19 +3: SYM_CODE_END(efi_enter_kernel) -3: .long . - 0b diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index b2880aeba7ca..e2e2740c55ce 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -171,24 +171,23 @@ SYM_FUNC_END_PI(__inval_dcache_area) SYM_FUNC_END(__dma_inv_area) /* - * __clean_dcache_area_poc(kaddr, size) + * __clean_dcache_area_poc(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoC. * - * - kaddr - kernel address - * - size - size in question + * - start - virtual start address of region + * - end - virtual end address of region */ SYM_FUNC_START_LOCAL(__dma_clean_area) SYM_FUNC_START_PI(__clean_dcache_area_poc) /* FALLTHROUGH */ /* - * __dma_clean_area(start, size) + * __dma_clean_area(start, end) * - start - virtual start address of region - * - size - size in question + * - end - virtual end address of region */ - add x1, x0, x1 dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__clean_dcache_area_poc) @@ -204,10 +203,10 @@ SYM_FUNC_END(__dma_clean_area) * - size - size in question */ SYM_FUNC_START_PI(__clean_dcache_area_pop) + add x1, x0, x1 alternative_if_not ARM64_HAS_DCPOP b __clean_dcache_area_poc alternative_else_nop_endif - add x1, x0, x1 dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(__clean_dcache_area_pop) @@ -236,7 +235,6 @@ SYM_FUNC_START_PI(__dma_map_area) add x1, x0, x1 cmp w2, #DMA_FROM_DEVICE b.eq __dma_inv_area - sub x1, x1, x0 b __dma_clean_area SYM_FUNC_END_PI(__dma_map_area) -- cgit From f749448edb9c98bece0aeec5536260a8794af24b Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:57 +0100 Subject: arm64: __clean_dcache_area_pop to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-15-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/lib/uaccess_flushcache.c | 4 ++-- arch/arm64/mm/cache.S | 9 ++++----- arch/arm64/mm/flush.c | 2 +- 4 files changed, 8 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 3255878d6f30..fa5641868d65 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -61,7 +61,7 @@ extern void invalidate_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(unsigned long start, unsigned long end); extern void __inval_dcache_area(unsigned long start, unsigned long end); extern void __clean_dcache_area_poc(unsigned long start, unsigned long end); -extern void __clean_dcache_area_pop(void *addr, size_t len); +extern void __clean_dcache_area_pop(unsigned long start, unsigned long end); extern void __clean_dcache_area_pou(void *addr, size_t len); extern long __flush_cache_user_range(unsigned long start, unsigned long end); extern void sync_icache_aliases(void *kaddr, unsigned long len); diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c index c83bb5a4aad2..62ea989effe8 100644 --- a/arch/arm64/lib/uaccess_flushcache.c +++ b/arch/arm64/lib/uaccess_flushcache.c @@ -15,7 +15,7 @@ void memcpy_flushcache(void *dst, const void *src, size_t cnt) * barrier to order the cache maintenance against the memcpy. */ memcpy(dst, src, cnt); - __clean_dcache_area_pop(dst, cnt); + __clean_dcache_area_pop((unsigned long)dst, (unsigned long)dst + cnt); } EXPORT_SYMBOL_GPL(memcpy_flushcache); @@ -33,6 +33,6 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from, rc = raw_copy_from_user(to, from, n); /* See above */ - __clean_dcache_area_pop(to, n - rc); + __clean_dcache_area_pop((unsigned long)to, (unsigned long)to + n - rc); return rc; } diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index e2e2740c55ce..b71fcf56516b 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -194,16 +194,15 @@ SYM_FUNC_END_PI(__clean_dcache_area_poc) SYM_FUNC_END(__dma_clean_area) /* - * __clean_dcache_area_pop(kaddr, size) + * __clean_dcache_area_pop(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoP. * - * - kaddr - kernel address - * - size - size in question + * - start - virtual start address of region + * - end - virtual end address of region */ SYM_FUNC_START_PI(__clean_dcache_area_pop) - add x1, x0, x1 alternative_if_not ARM64_HAS_DCPOP b __clean_dcache_area_poc alternative_else_nop_endif diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index be650b573b2a..b2c226d93ca5 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -84,7 +84,7 @@ void arch_wb_cache_pmem(void *addr, size_t size) { /* Ensure order against any prior non-cacheable writes */ dmb(osh); - __clean_dcache_area_pop(addr, size); + __clean_dcache_area_pop((unsigned long)addr, (unsigned long)addr + size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); -- cgit From 406d7d4e2bc76d38a6dc88733a0f72fabf02d305 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:58 +0100 Subject: arm64: __clean_dcache_area_pou to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-16-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/mm/cache.S | 9 ++++----- arch/arm64/mm/flush.c | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index fa5641868d65..f86723047315 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -62,7 +62,7 @@ extern void __flush_dcache_area(unsigned long start, unsigned long end); extern void __inval_dcache_area(unsigned long start, unsigned long end); extern void __clean_dcache_area_poc(unsigned long start, unsigned long end); extern void __clean_dcache_area_pop(unsigned long start, unsigned long end); -extern void __clean_dcache_area_pou(void *addr, size_t len); +extern void __clean_dcache_area_pou(unsigned long start, unsigned long end); extern long __flush_cache_user_range(unsigned long start, unsigned long end); extern void sync_icache_aliases(void *kaddr, unsigned long len); diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index b71fcf56516b..ea605d94182f 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -113,20 +113,19 @@ SYM_FUNC_START_PI(__flush_dcache_area) SYM_FUNC_END_PI(__flush_dcache_area) /* - * __clean_dcache_area_pou(kaddr, size) + * __clean_dcache_area_pou(start, end) * - * Ensure that any D-cache lines for the interval [kaddr, kaddr+size) + * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoU. * - * - kaddr - kernel address - * - size - size in question + * - start - virtual start address of region + * - end - virtual end address of region */ SYM_FUNC_START(__clean_dcache_area_pou) alternative_if ARM64_HAS_CACHE_IDC dsb ishst ret alternative_else_nop_endif - add x1, x0, x1 dcache_by_line_op cvau, ish, x0, x1, x2, x3 ret SYM_FUNC_END(__clean_dcache_area_pou) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index b2c226d93ca5..0341bcc6fdf3 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -19,7 +19,7 @@ void sync_icache_aliases(void *kaddr, unsigned long len) unsigned long addr = (unsigned long)kaddr; if (icache_is_aliasing()) { - __clean_dcache_area_pou(kaddr, len); + __clean_dcache_area_pou(kaddr, kaddr + len); __flush_icache_all(); } else { /* -- cgit From 8c28d52ccd1d6e3a5aca8a37e465a5f8b77edbc1 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:29:59 +0100 Subject: arm64: sync_icache_aliases to take end parameter instead of size To be consistent with other functions with similar names and functionality in cacheflush.h, cache.S, and cachetlb.rst, change to specify the range in terms of start and end, as opposed to start and size. No functional change intended. Reported-by: Will Deacon Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-17-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 2 +- arch/arm64/kernel/probes/uprobes.c | 2 +- arch/arm64/mm/flush.c | 21 ++++++++++----------- 3 files changed, 12 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index f86723047315..70b389a8dea5 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -64,7 +64,7 @@ extern void __clean_dcache_area_poc(unsigned long start, unsigned long end); extern void __clean_dcache_area_pop(unsigned long start, unsigned long end); extern void __clean_dcache_area_pou(unsigned long start, unsigned long end); extern long __flush_cache_user_range(unsigned long start, unsigned long end); -extern void sync_icache_aliases(void *kaddr, unsigned long len); +extern void sync_icache_aliases(unsigned long start, unsigned long end); static inline void flush_icache_range(unsigned long start, unsigned long end) { diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index 2c247634552b..9be668f3f034 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -21,7 +21,7 @@ void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, memcpy(dst, src, len); /* flush caches (dcache/icache) */ - sync_icache_aliases(dst, len); + sync_icache_aliases((unsigned long)dst, (unsigned long)dst + len); kunmap_atomic(xol_page_kaddr); } diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 0341bcc6fdf3..c4ca7e05fdb8 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -14,28 +14,25 @@ #include #include -void sync_icache_aliases(void *kaddr, unsigned long len) +void sync_icache_aliases(unsigned long start, unsigned long end) { - unsigned long addr = (unsigned long)kaddr; - if (icache_is_aliasing()) { - __clean_dcache_area_pou(kaddr, kaddr + len); + __clean_dcache_area_pou(start, end); __flush_icache_all(); } else { /* * Don't issue kick_all_cpus_sync() after I-cache invalidation * for user mappings. */ - __flush_icache_range(addr, addr + len); + __flush_icache_range(start, end); } } -static void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, - unsigned long uaddr, void *kaddr, - unsigned long len) +static void flush_ptrace_access(struct vm_area_struct *vma, unsigned long start, + unsigned long end) { if (vma->vm_flags & VM_EXEC) - sync_icache_aliases(kaddr, len); + sync_icache_aliases(start, end); } /* @@ -48,7 +45,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long len) { memcpy(dst, src, len); - flush_ptrace_access(vma, page, uaddr, dst, len); + flush_ptrace_access(vma, (unsigned long)dst, (unsigned long)dst + len); } void __sync_icache_dcache(pte_t pte) @@ -56,7 +53,9 @@ void __sync_icache_dcache(pte_t pte) struct page *page = pte_page(pte); if (!test_bit(PG_dcache_clean, &page->flags)) { - sync_icache_aliases(page_address(page), page_size(page)); + sync_icache_aliases((unsigned long)page_address(page), + (unsigned long)page_address(page) + + page_size(page)); set_bit(PG_dcache_clean, &page->flags); } } -- cgit From 393239be1ba69dcd29be504ffe14938509795821 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:30:00 +0100 Subject: arm64: Fix cache maintenance function comments Fix and expand comments for the cache maintenance functions in cacheflush.h. Adds comments to functions that weren't described before. Explains what the functions do using Arm Architecture Reference Manual terminology. No functional change intended. Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-18-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/cacheflush.h | 47 +++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 70b389a8dea5..26617df1fa45 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -30,31 +30,44 @@ * the implementation assumes non-aliasing VIPT D-cache and (aliasing) * VIPT I-cache. * - * flush_icache_range(start, end) + * All functions below apply to the interval [start, end) + * - start - virtual start address (inclusive) + * - end - virtual end address (exclusive) * - * Ensure coherency between the I-cache and the D-cache in the - * region described by start, end. - * - start - virtual start address - * - end - virtual end address + * __flush_icache_range(start, end) * - * invalidate_icache_range(start, end) - * - * Invalidate the I-cache in the region described by start, end. - * - start - virtual start address - * - end - virtual end address + * Ensure coherency between the I-cache and the D-cache region to + * the Point of Unification. * * __flush_cache_user_range(start, end) * - * Ensure coherency between the I-cache and the D-cache in the - * region described by start, end. - * - start - virtual start address - * - end - virtual end address + * Ensure coherency between the I-cache and the D-cache region to + * the Point of Unification. + * Use only if the region might access user memory. + * + * invalidate_icache_range(start, end) + * + * Invalidate I-cache region to the Point of Unification. * * __flush_dcache_area(start, end) * - * Ensure that the data held in page is written back. - * - start - virtual start address - * - end - virtual end address + * Clean and invalidate D-cache region to the Point of Coherency. + * + * __inval_dcache_area(start, end) + * + * Invalidate D-cache region to the Point of Coherency. + * + * __clean_dcache_area_poc(start, end) + * + * Clean D-cache region to the Point of Coherency. + * + * __clean_dcache_area_pop(start, end) + * + * Clean D-cache region to the Point of Persistence. + * + * __clean_dcache_area_pou(start, end) + * + * Clean D-cache region to the Point of Unification. */ extern void __flush_icache_range(unsigned long start, unsigned long end); extern void invalidate_icache_range(unsigned long start, unsigned long end); -- cgit From fade9c2c6ee2baea7df8e6059b3f143c681e5ce4 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Mon, 24 May 2021 09:30:01 +0100 Subject: arm64: Rename arm64-internal cache maintenance functions Although naming across the codebase isn't that consistent, it tends to follow certain patterns. Moreover, the term "flush" isn't defined in the Arm Architecture reference manual, and might be interpreted to mean clean, invalidate, or both for a cache. Rename arm64-internal functions to make the naming internally consistent, as well as making it consistent with the Arm ARM, by specifying whether it applies to the instruction, data, or both caches, whether the operation is a clean, invalidate, or both. Also specify which point the operation applies to, i.e., to the point of unification (PoU), coherency (PoC), or persistence (PoP). This commit applies the following sed transformation to all files under arch/arm64: "s/\b__flush_cache_range\b/caches_clean_inval_pou_macro/g;"\ "s/\b__flush_icache_range\b/caches_clean_inval_pou/g;"\ "s/\binvalidate_icache_range\b/icache_inval_pou/g;"\ "s/\b__flush_dcache_area\b/dcache_clean_inval_poc/g;"\ "s/\b__inval_dcache_area\b/dcache_inval_poc/g;"\ "s/__clean_dcache_area_poc\b/dcache_clean_poc/g;"\ "s/\b__clean_dcache_area_pop\b/dcache_clean_pop/g;"\ "s/\b__clean_dcache_area_pou\b/dcache_clean_pou/g;"\ "s/\b__flush_cache_user_range\b/caches_clean_inval_user_pou/g;"\ "s/\b__flush_icache_all\b/icache_inval_all_pou/g;" Note that __clean_dcache_area_poc is deliberately missing a word boundary check at the beginning in order to match the efistub symbols in image-vars.h. Also note that, despite its name, __flush_icache_range operates on both instruction and data caches. The name change here reflects that. No functional change intended. Acked-by: Mark Rutland Signed-off-by: Fuad Tabba Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20210524083001.2586635-19-tabba@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/arch_gicv3.h | 2 +- arch/arm64/include/asm/cacheflush.h | 36 +++++++++++------------ arch/arm64/include/asm/efi.h | 2 +- arch/arm64/include/asm/kvm_mmu.h | 6 ++-- arch/arm64/kernel/alternative.c | 2 +- arch/arm64/kernel/efi-entry.S | 4 +-- arch/arm64/kernel/head.S | 8 ++--- arch/arm64/kernel/hibernate-asm.S | 4 +-- arch/arm64/kernel/hibernate.c | 12 ++++---- arch/arm64/kernel/idreg-override.c | 2 +- arch/arm64/kernel/image-vars.h | 2 +- arch/arm64/kernel/insn.c | 2 +- arch/arm64/kernel/kaslr.c | 6 ++-- arch/arm64/kernel/machine_kexec.c | 10 +++---- arch/arm64/kernel/smp.c | 4 +-- arch/arm64/kernel/smp_spin_table.c | 4 +-- arch/arm64/kernel/sys_compat.c | 2 +- arch/arm64/kvm/arm.c | 2 +- arch/arm64/kvm/hyp/nvhe/cache.S | 4 +-- arch/arm64/kvm/hyp/nvhe/setup.c | 2 +- arch/arm64/kvm/hyp/nvhe/tlb.c | 2 +- arch/arm64/kvm/hyp/pgtable.c | 4 +-- arch/arm64/lib/uaccess_flushcache.c | 4 +-- arch/arm64/mm/cache.S | 58 ++++++++++++++++++------------------- arch/arm64/mm/flush.c | 12 ++++---- 25 files changed, 98 insertions(+), 98 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index ed1cc9d8e6df..4ad22c3135db 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -125,7 +125,7 @@ static inline u32 gic_read_rpr(void) #define gic_write_lpir(v, c) writeq_relaxed(v, c) #define gic_flush_dcache_to_poc(a,l) \ - __flush_dcache_area((unsigned long)(a), (unsigned long)(a)+(l)) + dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l)) #define gits_read_baser(c) readq_relaxed(c) #define gits_write_baser(v, c) writeq_relaxed(v, c) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 26617df1fa45..543c997eb3b7 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -34,54 +34,54 @@ * - start - virtual start address (inclusive) * - end - virtual end address (exclusive) * - * __flush_icache_range(start, end) + * caches_clean_inval_pou(start, end) * * Ensure coherency between the I-cache and the D-cache region to * the Point of Unification. * - * __flush_cache_user_range(start, end) + * caches_clean_inval_user_pou(start, end) * * Ensure coherency between the I-cache and the D-cache region to * the Point of Unification. * Use only if the region might access user memory. * - * invalidate_icache_range(start, end) + * icache_inval_pou(start, end) * * Invalidate I-cache region to the Point of Unification. * - * __flush_dcache_area(start, end) + * dcache_clean_inval_poc(start, end) * * Clean and invalidate D-cache region to the Point of Coherency. * - * __inval_dcache_area(start, end) + * dcache_inval_poc(start, end) * * Invalidate D-cache region to the Point of Coherency. * - * __clean_dcache_area_poc(start, end) + * dcache_clean_poc(start, end) * * Clean D-cache region to the Point of Coherency. * - * __clean_dcache_area_pop(start, end) + * dcache_clean_pop(start, end) * * Clean D-cache region to the Point of Persistence. * - * __clean_dcache_area_pou(start, end) + * dcache_clean_pou(start, end) * * Clean D-cache region to the Point of Unification. */ -extern void __flush_icache_range(unsigned long start, unsigned long end); -extern void invalidate_icache_range(unsigned long start, unsigned long end); -extern void __flush_dcache_area(unsigned long start, unsigned long end); -extern void __inval_dcache_area(unsigned long start, unsigned long end); -extern void __clean_dcache_area_poc(unsigned long start, unsigned long end); -extern void __clean_dcache_area_pop(unsigned long start, unsigned long end); -extern void __clean_dcache_area_pou(unsigned long start, unsigned long end); -extern long __flush_cache_user_range(unsigned long start, unsigned long end); +extern void caches_clean_inval_pou(unsigned long start, unsigned long end); +extern void icache_inval_pou(unsigned long start, unsigned long end); +extern void dcache_clean_inval_poc(unsigned long start, unsigned long end); +extern void dcache_inval_poc(unsigned long start, unsigned long end); +extern void dcache_clean_poc(unsigned long start, unsigned long end); +extern void dcache_clean_pop(unsigned long start, unsigned long end); +extern void dcache_clean_pou(unsigned long start, unsigned long end); +extern long caches_clean_inval_user_pou(unsigned long start, unsigned long end); extern void sync_icache_aliases(unsigned long start, unsigned long end); static inline void flush_icache_range(unsigned long start, unsigned long end) { - __flush_icache_range(start, end); + caches_clean_inval_pou(start, end); /* * IPI all online CPUs so that they undergo a context synchronization @@ -135,7 +135,7 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); -static __always_inline void __flush_icache_all(void) +static __always_inline void icache_inval_all_pou(void) { if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) return; diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 0ae2397076fd..1bed37eb013a 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -137,7 +137,7 @@ void efi_virtmap_unload(void); static inline void efi_capsule_flush_cache_range(void *addr, int size) { - __flush_dcache_area((unsigned long)addr, (unsigned long)addr + size); + dcache_clean_inval_poc((unsigned long)addr, (unsigned long)addr + size); } #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 33293d5855af..f4cbfa9025a8 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -181,7 +181,7 @@ static inline void *__kvm_vector_slot2addr(void *base, struct kvm; #define kvm_flush_dcache_to_poc(a,l) \ - __flush_dcache_area((unsigned long)(a), (unsigned long)(a)+(l)) + dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l)) static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { @@ -209,12 +209,12 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, { if (icache_is_aliasing()) { /* any kind of VIPT cache */ - __flush_icache_all(); + icache_inval_all_pou(); } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ void *va = page_address(pfn_to_page(pfn)); - invalidate_icache_range((unsigned long)va, + icache_inval_pou((unsigned long)va, (unsigned long)va + size); } } diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index c906d20c7b52..3fb79b76e9d9 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -181,7 +181,7 @@ static void __nocfi __apply_alternatives(struct alt_region *region, bool is_modu */ if (!is_module) { dsb(ish); - __flush_icache_all(); + icache_inval_all_pou(); isb(); /* Ignore ARM64_CB bit from feature mask */ diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index b0f728fb61f0..61a87fa1c305 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -29,7 +29,7 @@ SYM_CODE_START(efi_enter_kernel) */ ldr w1, =kernel_size add x1, x0, x1 - bl __clean_dcache_area_poc + bl dcache_clean_poc ic ialluis /* @@ -38,7 +38,7 @@ SYM_CODE_START(efi_enter_kernel) */ adr x0, 0f adr x1, 3f - bl __clean_dcache_area_poc + bl dcache_clean_poc 0: /* Turn off Dcache and MMU */ mrs x0, CurrentEL diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 8df0ac8d9123..6928cb67d3a0 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -118,7 +118,7 @@ SYM_CODE_START_LOCAL(preserve_boot_args) // MMU off add x1, x0, #0x20 // 4 x 8 bytes - b __inval_dcache_area // tail call + b dcache_inval_poc // tail call SYM_CODE_END(preserve_boot_args) /* @@ -268,7 +268,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) */ adrp x0, init_pg_dir adrp x1, init_pg_end - bl __inval_dcache_area + bl dcache_inval_poc /* * Clear the init page tables. @@ -381,11 +381,11 @@ SYM_FUNC_START_LOCAL(__create_page_tables) adrp x0, idmap_pg_dir adrp x1, idmap_pg_end - bl __inval_dcache_area + bl dcache_inval_poc adrp x0, init_pg_dir adrp x1, init_pg_end - bl __inval_dcache_area + bl dcache_inval_poc ret x28 SYM_FUNC_END(__create_page_tables) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index ef2ab7caf815..81c0186a5e32 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -45,7 +45,7 @@ * Because this code has to be copied to a 'safe' page, it can't call out to * other functions by PC-relative address. Also remember that it may be * mid-way through over-writing other functions. For this reason it contains - * code from __flush_icache_range() and uses the copy_page() macro. + * code from caches_clean_inval_pou() and uses the copy_page() macro. * * This 'safe' page is mapped via ttbr0, and executed from there. This function * switches to a copy of the linear map in ttbr1, performs the restore, then @@ -87,7 +87,7 @@ SYM_CODE_START(swsusp_arch_suspend_exit) copy_page x0, x1, x2, x3, x4, x5, x6, x7, x8, x9 add x1, x10, #PAGE_SIZE - /* Clean the copied page to PoU - based on __flush_icache_range() */ + /* Clean the copied page to PoU - based on caches_clean_inval_pou() */ raw_dcache_line_size x2, x3 sub x3, x2, #1 bic x4, x10, x3 diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index b40ddce71507..46a0b4d6e251 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -210,7 +210,7 @@ static int create_safe_exec_page(void *src_start, size_t length, return -ENOMEM; memcpy(page, src_start, length); - __flush_icache_range((unsigned long)page, (unsigned long)page + length); + caches_clean_inval_pou((unsigned long)page, (unsigned long)page + length); rc = trans_pgd_idmap_page(&trans_info, &trans_ttbr0, &t0sz, page); if (rc) return rc; @@ -381,17 +381,17 @@ int swsusp_arch_suspend(void) ret = swsusp_save(); } else { /* Clean kernel core startup/idle code to PoC*/ - __flush_dcache_area((unsigned long)__mmuoff_data_start, + dcache_clean_inval_poc((unsigned long)__mmuoff_data_start, (unsigned long)__mmuoff_data_end); - __flush_dcache_area((unsigned long)__idmap_text_start, + dcache_clean_inval_poc((unsigned long)__idmap_text_start, (unsigned long)__idmap_text_end); /* Clean kvm setup code to PoC? */ if (el2_reset_needed()) { - __flush_dcache_area( + dcache_clean_inval_poc( (unsigned long)__hyp_idmap_text_start, (unsigned long)__hyp_idmap_text_end); - __flush_dcache_area((unsigned long)__hyp_text_start, + dcache_clean_inval_poc((unsigned long)__hyp_text_start, (unsigned long)__hyp_text_end); } @@ -477,7 +477,7 @@ int swsusp_arch_resume(void) * The hibernate exit text contains a set of el2 vectors, that will * be executed at el2 with the mmu off in order to reload hyp-stub. */ - __flush_dcache_area((unsigned long)hibernate_exit, + dcache_clean_inval_poc((unsigned long)hibernate_exit, (unsigned long)hibernate_exit + exit_size); /* diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 3dd515baf526..53a381a7f65d 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -237,7 +237,7 @@ asmlinkage void __init init_feature_override(void) for (i = 0; i < ARRAY_SIZE(regs); i++) { if (regs[i]->override) - __flush_dcache_area((unsigned long)regs[i]->override, + dcache_clean_inval_poc((unsigned long)regs[i]->override, (unsigned long)regs[i]->override + sizeof(*regs[i]->override)); } diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index bcf3c2755370..c96a9a0043bf 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -35,7 +35,7 @@ __efistub_strnlen = __pi_strnlen; __efistub_strcmp = __pi_strcmp; __efistub_strncmp = __pi_strncmp; __efistub_strrchr = __pi_strrchr; -__efistub___clean_dcache_area_poc = __pi___clean_dcache_area_poc; +__efistub_dcache_clean_poc = __pi_dcache_clean_poc; #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) __efistub___memcpy = __pi_memcpy; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 6c0de2f60ea9..51cb8dc98d00 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -198,7 +198,7 @@ int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) ret = aarch64_insn_write(tp, insn); if (ret == 0) - __flush_icache_range((uintptr_t)tp, + caches_clean_inval_pou((uintptr_t)tp, (uintptr_t)tp + AARCH64_INSN_SIZE); return ret; diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 49cccd03cb37..cfa2cfde3019 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -72,7 +72,7 @@ u64 __init kaslr_early_init(void) * we end up running with module randomization disabled. */ module_alloc_base = (u64)_etext - MODULES_VSIZE; - __flush_dcache_area((unsigned long)&module_alloc_base, + dcache_clean_inval_poc((unsigned long)&module_alloc_base, (unsigned long)&module_alloc_base + sizeof(module_alloc_base)); @@ -172,10 +172,10 @@ u64 __init kaslr_early_init(void) module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; module_alloc_base &= PAGE_MASK; - __flush_dcache_area((unsigned long)&module_alloc_base, + dcache_clean_inval_poc((unsigned long)&module_alloc_base, (unsigned long)&module_alloc_base + sizeof(module_alloc_base)); - __flush_dcache_area((unsigned long)&memstart_offset_seed, + dcache_clean_inval_poc((unsigned long)&memstart_offset_seed, (unsigned long)&memstart_offset_seed + sizeof(memstart_offset_seed)); diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 3e79110c8f3a..03ceabe4d912 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -72,10 +72,10 @@ int machine_kexec_post_load(struct kimage *kimage) * For execution with the MMU off, reloc_code needs to be cleaned to the * PoC and invalidated from the I-cache. */ - __flush_dcache_area((unsigned long)reloc_code, + dcache_clean_inval_poc((unsigned long)reloc_code, (unsigned long)reloc_code + arm64_relocate_new_kernel_size); - invalidate_icache_range((uintptr_t)reloc_code, + icache_inval_pou((uintptr_t)reloc_code, (uintptr_t)reloc_code + arm64_relocate_new_kernel_size); @@ -111,7 +111,7 @@ static void kexec_list_flush(struct kimage *kimage) unsigned long addr; /* flush the list entries. */ - __flush_dcache_area((unsigned long)entry, + dcache_clean_inval_poc((unsigned long)entry, (unsigned long)entry + sizeof(kimage_entry_t)); @@ -128,7 +128,7 @@ static void kexec_list_flush(struct kimage *kimage) break; case IND_SOURCE: /* flush the source pages. */ - __flush_dcache_area(addr, addr + PAGE_SIZE); + dcache_clean_inval_poc(addr, addr + PAGE_SIZE); break; case IND_DESTINATION: break; @@ -155,7 +155,7 @@ static void kexec_segment_flush(const struct kimage *kimage) kimage->segment[i].memsz, kimage->segment[i].memsz / PAGE_SIZE); - __flush_dcache_area( + dcache_clean_inval_poc( (unsigned long)phys_to_virt(kimage->segment[i].mem), (unsigned long)phys_to_virt(kimage->segment[i].mem) + kimage->segment[i].memsz); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5fcdee331087..9b4c1118194d 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -122,7 +122,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) secondary_data.task = idle; secondary_data.stack = task_stack_page(idle) + THREAD_SIZE; update_cpu_boot_status(CPU_MMU_OFF); - __flush_dcache_area((unsigned long)&secondary_data, + dcache_clean_inval_poc((unsigned long)&secondary_data, (unsigned long)&secondary_data + sizeof(secondary_data)); @@ -145,7 +145,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_crit("CPU%u: failed to come online\n", cpu); secondary_data.task = NULL; secondary_data.stack = NULL; - __flush_dcache_area((unsigned long)&secondary_data, + dcache_clean_inval_poc((unsigned long)&secondary_data, (unsigned long)&secondary_data + sizeof(secondary_data)); status = READ_ONCE(secondary_data.status); diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 58d804582a35..7e1624ecab3c 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -36,7 +36,7 @@ static void write_pen_release(u64 val) unsigned long size = sizeof(secondary_holding_pen_release); secondary_holding_pen_release = val; - __flush_dcache_area((unsigned long)start, (unsigned long)start + size); + dcache_clean_inval_poc((unsigned long)start, (unsigned long)start + size); } @@ -90,7 +90,7 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) * the boot protocol. */ writeq_relaxed(pa_holding_pen, release_addr); - __flush_dcache_area((__force unsigned long)release_addr, + dcache_clean_inval_poc((__force unsigned long)release_addr, (__force unsigned long)release_addr + sizeof(*release_addr)); diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 265fe3eb1069..db5159a3055f 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -41,7 +41,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end) dsb(ish); } - ret = __flush_cache_user_range(start, start + chunk); + ret = caches_clean_inval_user_pou(start, start + chunk); if (ret) return ret; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 1cb39c0803a4..c1953f65ca0e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1064,7 +1064,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) stage2_unmap_vm(vcpu->kvm); else - __flush_icache_all(); + icache_inval_all_pou(); } vcpu_reset_hcr(vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 36cef6915428..958734f4d6b0 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -7,7 +7,7 @@ #include #include -SYM_FUNC_START_PI(__flush_dcache_area) +SYM_FUNC_START_PI(dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__flush_dcache_area) +SYM_FUNC_END_PI(dcache_clean_inval_poc) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 5dffe928f256..8143ebd4fb72 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -134,7 +134,7 @@ static void update_nvhe_init_params(void) for (i = 0; i < hyp_nr_cpus; i++) { params = per_cpu_ptr(&kvm_init_params, i); params->pgd_pa = __hyp_pa(pkvm_pgtable.pgd); - __flush_dcache_area((unsigned long)params, + dcache_clean_inval_poc((unsigned long)params, (unsigned long)params + sizeof(*params)); } } diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 83dc3b271bc5..38ed0f6f2703 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -104,7 +104,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, * you should be running with VHE enabled. */ if (icache_is_vpipt()) - __flush_icache_all(); + icache_inval_all_pou(); __tlb_switch_to_host(&cxt); } diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 10d2f04013d4..e9ad7fb28ee3 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -841,7 +841,7 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, if (need_flush) { kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops); - __flush_dcache_area((unsigned long)pte_follow, + dcache_clean_inval_poc((unsigned long)pte_follow, (unsigned long)pte_follow + kvm_granule_size(level)); } @@ -997,7 +997,7 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, return 0; pte_follow = kvm_pte_follow(pte, mm_ops); - __flush_dcache_area((unsigned long)pte_follow, + dcache_clean_inval_poc((unsigned long)pte_follow, (unsigned long)pte_follow + kvm_granule_size(level)); return 0; diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c index 62ea989effe8..baee22961bdb 100644 --- a/arch/arm64/lib/uaccess_flushcache.c +++ b/arch/arm64/lib/uaccess_flushcache.c @@ -15,7 +15,7 @@ void memcpy_flushcache(void *dst, const void *src, size_t cnt) * barrier to order the cache maintenance against the memcpy. */ memcpy(dst, src, cnt); - __clean_dcache_area_pop((unsigned long)dst, (unsigned long)dst + cnt); + dcache_clean_pop((unsigned long)dst, (unsigned long)dst + cnt); } EXPORT_SYMBOL_GPL(memcpy_flushcache); @@ -33,6 +33,6 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from, rc = raw_copy_from_user(to, from, n); /* See above */ - __clean_dcache_area_pop((unsigned long)to, (unsigned long)to + n - rc); + dcache_clean_pop((unsigned long)to, (unsigned long)to + n - rc); return rc; } diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index ea605d94182f..5051b3c1a4f1 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -15,7 +15,7 @@ #include /* - * __flush_cache_range(start,end) [fixup] + * caches_clean_inval_pou_macro(start,end) [fixup] * * Ensure that the I and D caches are coherent within specified region. * This is typically used when code has been written to a memory region, @@ -25,7 +25,7 @@ * - end - virtual end address of region * - fixup - optional label to branch to on user fault */ -.macro __flush_cache_range, fixup +.macro caches_clean_inval_pou_macro, fixup alternative_if ARM64_HAS_CACHE_IDC dsb ishst b .Ldc_skip_\@ @@ -43,7 +43,7 @@ alternative_else_nop_endif .endm /* - * __flush_icache_range(start,end) + * caches_clean_inval_pou(start,end) * * Ensure that the I and D caches are coherent within specified region. * This is typically used when code has been written to a memory region, @@ -52,13 +52,13 @@ alternative_else_nop_endif * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START(__flush_icache_range) - __flush_cache_range +SYM_FUNC_START(caches_clean_inval_pou) + caches_clean_inval_pou_macro ret -SYM_FUNC_END(__flush_icache_range) +SYM_FUNC_END(caches_clean_inval_pou) /* - * __flush_cache_user_range(start,end) + * caches_clean_inval_user_pou(start,end) * * Ensure that the I and D caches are coherent within specified region. * This is typically used when code has been written to a memory region, @@ -67,10 +67,10 @@ SYM_FUNC_END(__flush_icache_range) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START(__flush_cache_user_range) +SYM_FUNC_START(caches_clean_inval_user_pou) uaccess_ttbr0_enable x2, x3, x4 - __flush_cache_range 2f + caches_clean_inval_pou_macro 2f mov x0, xzr 1: uaccess_ttbr0_disable x1, x2 @@ -78,17 +78,17 @@ SYM_FUNC_START(__flush_cache_user_range) 2: mov x0, #-EFAULT b 1b -SYM_FUNC_END(__flush_cache_user_range) +SYM_FUNC_END(caches_clean_inval_user_pou) /* - * invalidate_icache_range(start,end) + * icache_inval_pou(start,end) * * Ensure that the I cache is invalid within specified region. * * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START(invalidate_icache_range) +SYM_FUNC_START(icache_inval_pou) alternative_if ARM64_HAS_CACHE_DIC isb ret @@ -96,10 +96,10 @@ alternative_else_nop_endif invalidate_icache_by_line x0, x1, x2, x3 ret -SYM_FUNC_END(invalidate_icache_range) +SYM_FUNC_END(icache_inval_pou) /* - * __flush_dcache_area(start, end) + * dcache_clean_inval_poc(start, end) * * Ensure that any D-cache lines for the interval [start, end) * are cleaned and invalidated to the PoC. @@ -107,13 +107,13 @@ SYM_FUNC_END(invalidate_icache_range) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(__flush_dcache_area) +SYM_FUNC_START_PI(dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__flush_dcache_area) +SYM_FUNC_END_PI(dcache_clean_inval_poc) /* - * __clean_dcache_area_pou(start, end) + * dcache_clean_pou(start, end) * * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoU. @@ -121,17 +121,17 @@ SYM_FUNC_END_PI(__flush_dcache_area) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START(__clean_dcache_area_pou) +SYM_FUNC_START(dcache_clean_pou) alternative_if ARM64_HAS_CACHE_IDC dsb ishst ret alternative_else_nop_endif dcache_by_line_op cvau, ish, x0, x1, x2, x3 ret -SYM_FUNC_END(__clean_dcache_area_pou) +SYM_FUNC_END(dcache_clean_pou) /* - * __inval_dcache_area(start, end) + * dcache_inval_poc(start, end) * * Ensure that any D-cache lines for the interval [start, end) * are invalidated. Any partial lines at the ends of the interval are @@ -141,7 +141,7 @@ SYM_FUNC_END(__clean_dcache_area_pou) * - end - kernel end address of region */ SYM_FUNC_START_LOCAL(__dma_inv_area) -SYM_FUNC_START_PI(__inval_dcache_area) +SYM_FUNC_START_PI(dcache_inval_poc) /* FALLTHROUGH */ /* @@ -166,11 +166,11 @@ SYM_FUNC_START_PI(__inval_dcache_area) b.lo 2b dsb sy ret -SYM_FUNC_END_PI(__inval_dcache_area) +SYM_FUNC_END_PI(dcache_inval_poc) SYM_FUNC_END(__dma_inv_area) /* - * __clean_dcache_area_poc(start, end) + * dcache_clean_poc(start, end) * * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoC. @@ -179,7 +179,7 @@ SYM_FUNC_END(__dma_inv_area) * - end - virtual end address of region */ SYM_FUNC_START_LOCAL(__dma_clean_area) -SYM_FUNC_START_PI(__clean_dcache_area_poc) +SYM_FUNC_START_PI(dcache_clean_poc) /* FALLTHROUGH */ /* @@ -189,11 +189,11 @@ SYM_FUNC_START_PI(__clean_dcache_area_poc) */ dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__clean_dcache_area_poc) +SYM_FUNC_END_PI(dcache_clean_poc) SYM_FUNC_END(__dma_clean_area) /* - * __clean_dcache_area_pop(start, end) + * dcache_clean_pop(start, end) * * Ensure that any D-cache lines for the interval [start, end) * are cleaned to the PoP. @@ -201,13 +201,13 @@ SYM_FUNC_END(__dma_clean_area) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(__clean_dcache_area_pop) +SYM_FUNC_START_PI(dcache_clean_pop) alternative_if_not ARM64_HAS_DCPOP - b __clean_dcache_area_poc + b dcache_clean_poc alternative_else_nop_endif dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__clean_dcache_area_pop) +SYM_FUNC_END_PI(dcache_clean_pop) /* * __dma_flush_area(start, size) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index c4ca7e05fdb8..2aaf950b906c 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -17,14 +17,14 @@ void sync_icache_aliases(unsigned long start, unsigned long end) { if (icache_is_aliasing()) { - __clean_dcache_area_pou(start, end); - __flush_icache_all(); + dcache_clean_pou(start, end); + icache_inval_all_pou(); } else { /* * Don't issue kick_all_cpus_sync() after I-cache invalidation * for user mappings. */ - __flush_icache_range(start, end); + caches_clean_inval_pou(start, end); } } @@ -76,20 +76,20 @@ EXPORT_SYMBOL(flush_dcache_page); /* * Additional functions defined in assembly. */ -EXPORT_SYMBOL(__flush_icache_range); +EXPORT_SYMBOL(caches_clean_inval_pou); #ifdef CONFIG_ARCH_HAS_PMEM_API void arch_wb_cache_pmem(void *addr, size_t size) { /* Ensure order against any prior non-cacheable writes */ dmb(osh); - __clean_dcache_area_pop((unsigned long)addr, (unsigned long)addr + size); + dcache_clean_pop((unsigned long)addr, (unsigned long)addr + size); } EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); void arch_invalidate_pmem(void *addr, size_t size) { - __inval_dcache_area((unsigned long)addr, (unsigned long)addr + size); + dcache_inval_poc((unsigned long)addr, (unsigned long)addr + size); } EXPORT_SYMBOL_GPL(arch_invalidate_pmem); #endif -- cgit From 7adc7b225cddcfd0f346d10144fd7a3d3d9f9ea7 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Wed, 12 May 2021 09:20:50 +1200 Subject: powerpc/fsl: set fsl,i2c-erratum-a004447 flag for P2041 i2c controllers The i2c controllers on the P2040/P2041 have an erratum where the documented scheme for i2c bus recovery will not work (A-004447). A different mechanism is needed which is documented in the P2040 Chip Errata Rev Q (latest available at the time of writing). Signed-off-by: Chris Packham Acked-by: Michael Ellerman Signed-off-by: Wolfram Sang --- arch/powerpc/boot/dts/fsl/p2041si-post.dtsi | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi index 872e4485dc3f..ddc018d42252 100644 --- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi @@ -371,7 +371,23 @@ }; /include/ "qoriq-i2c-0.dtsi" + i2c@118000 { + fsl,i2c-erratum-a004447; + }; + + i2c@118100 { + fsl,i2c-erratum-a004447; + }; + /include/ "qoriq-i2c-1.dtsi" + i2c@119000 { + fsl,i2c-erratum-a004447; + }; + + i2c@119100 { + fsl,i2c-erratum-a004447; + }; + /include/ "qoriq-duart-0.dtsi" /include/ "qoriq-duart-1.dtsi" /include/ "qoriq-gpio-0.dtsi" -- cgit From 19ae697a1e4edf1d755b413e3aa38da65e2db23b Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Wed, 12 May 2021 09:20:51 +1200 Subject: powerpc/fsl: set fsl,i2c-erratum-a004447 flag for P1010 i2c controllers The i2c controllers on the P1010 have an erratum where the documented scheme for i2c bus recovery will not work (A-004447). A different mechanism is needed which is documented in the P1010 Chip Errata Rev L. Signed-off-by: Chris Packham Acked-by: Michael Ellerman Signed-off-by: Wolfram Sang --- arch/powerpc/boot/dts/fsl/p1010si-post.dtsi | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi index c2717f31925a..ccda0a91abf0 100644 --- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi @@ -122,7 +122,15 @@ }; /include/ "pq3-i2c-0.dtsi" + i2c@3000 { + fsl,i2c-erratum-a004447; + }; + /include/ "pq3-i2c-1.dtsi" + i2c@3100 { + fsl,i2c-erratum-a004447; + }; + /include/ "pq3-duart-0.dtsi" /include/ "pq3-espi-0.dtsi" spi0: spi@7000 { -- cgit From 0e5cb7770684b4c81bcc63f4675e488f9a0e31eb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 27 Feb 2021 10:23:45 +0000 Subject: irqchip/gic: Split vGIC probing information from the GIC code The vGIC advertising code is unsurprisingly very much tied to the GIC implementations. However, we are about to extend the support to lesser implementations. Let's dissociate the vgic registration from the GIC code and move it into KVM, where it makes a bit more sense. This also allows us to mark the gic_kvm_info structures as __initdata. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 18 ++++++++++++--- drivers/irqchip/irq-gic-common.c | 13 ----------- drivers/irqchip/irq-gic-common.h | 2 -- drivers/irqchip/irq-gic-v3.c | 6 ++--- drivers/irqchip/irq-gic.c | 6 ++--- include/linux/irqchip/arm-gic-common.h | 25 +-------------------- include/linux/irqchip/arm-vgic-info.h | 41 ++++++++++++++++++++++++++++++++++ 7 files changed, 63 insertions(+), 48 deletions(-) create mode 100644 include/linux/irqchip/arm-vgic-info.h (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 58cbda00e56d..2fdb65529594 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -482,6 +482,16 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) return IRQ_HANDLED; } +static struct gic_kvm_info *gic_kvm_info; + +void __init vgic_set_kvm_info(const struct gic_kvm_info *info) +{ + BUG_ON(gic_kvm_info != NULL); + gic_kvm_info = kmalloc(sizeof(*info), GFP_KERNEL); + if (gic_kvm_info) + *gic_kvm_info = *info; +} + /** * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware * @@ -509,10 +519,8 @@ void kvm_vgic_init_cpu_hardware(void) */ int kvm_vgic_hyp_init(void) { - const struct gic_kvm_info *gic_kvm_info; int ret; - gic_kvm_info = gic_get_kvm_info(); if (!gic_kvm_info) return -ENODEV; @@ -536,10 +544,14 @@ int kvm_vgic_hyp_init(void) ret = -ENODEV; } + kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; + + kfree(gic_kvm_info); + gic_kvm_info = NULL; + if (ret) return ret; - kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c index f47b41dfd023..a610821c8ff2 100644 --- a/drivers/irqchip/irq-gic-common.c +++ b/drivers/irqchip/irq-gic-common.c @@ -12,19 +12,6 @@ static DEFINE_RAW_SPINLOCK(irq_controller_lock); -static const struct gic_kvm_info *gic_kvm_info; - -const struct gic_kvm_info *gic_get_kvm_info(void) -{ - return gic_kvm_info; -} - -void gic_set_kvm_info(const struct gic_kvm_info *info) -{ - BUG_ON(gic_kvm_info != NULL); - gic_kvm_info = info; -} - void gic_enable_of_quirks(const struct device_node *np, const struct gic_quirk *quirks, void *data) { diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h index ccba8b0fe0f5..27e3d4ed4f32 100644 --- a/drivers/irqchip/irq-gic-common.h +++ b/drivers/irqchip/irq-gic-common.h @@ -28,6 +28,4 @@ void gic_enable_quirks(u32 iidr, const struct gic_quirk *quirks, void gic_enable_of_quirks(const struct device_node *np, const struct gic_quirk *quirks, void *data); -void gic_set_kvm_info(const struct gic_kvm_info *info); - #endif /* _IRQ_GIC_COMMON_H */ diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 37a23aa6de37..453fc425eede 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -103,7 +103,7 @@ EXPORT_SYMBOL(gic_nonsecure_priorities); /* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */ static refcount_t *ppi_nmi_refs; -static struct gic_kvm_info gic_v3_kvm_info; +static struct gic_kvm_info gic_v3_kvm_info __initdata; static DEFINE_PER_CPU(bool, has_rss); #define MPIDR_RS(mpidr) (((mpidr) & 0xF0UL) >> 4) @@ -1852,7 +1852,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; - gic_set_kvm_info(&gic_v3_kvm_info); + vgic_set_kvm_info(&gic_v3_kvm_info); } static int __init gic_of_init(struct device_node *node, struct device_node *parent) @@ -2168,7 +2168,7 @@ static void __init gic_acpi_setup_kvm_info(void) gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; - gic_set_kvm_info(&gic_v3_kvm_info); + vgic_set_kvm_info(&gic_v3_kvm_info); } static int __init diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index b1d9c22caf2e..2de9ec8ece0c 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -119,7 +119,7 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); static struct gic_chip_data gic_data[CONFIG_ARM_GIC_MAX_NR] __read_mostly; -static struct gic_kvm_info gic_v2_kvm_info; +static struct gic_kvm_info gic_v2_kvm_info __initdata; static DEFINE_PER_CPU(u32, sgi_intid); @@ -1451,7 +1451,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) return; if (static_branch_likely(&supports_deactivate_key)) - gic_set_kvm_info(&gic_v2_kvm_info); + vgic_set_kvm_info(&gic_v2_kvm_info); } int __init @@ -1618,7 +1618,7 @@ static void __init gic_acpi_setup_kvm_info(void) gic_v2_kvm_info.maint_irq = irq; - gic_set_kvm_info(&gic_v2_kvm_info); + vgic_set_kvm_info(&gic_v2_kvm_info); } static int __init gic_v2_acpi_init(union acpi_subtable_headers *header, diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h index fa8c0455c352..1177f3a1aed5 100644 --- a/include/linux/irqchip/arm-gic-common.h +++ b/include/linux/irqchip/arm-gic-common.h @@ -7,8 +7,7 @@ #ifndef __LINUX_IRQCHIP_ARM_GIC_COMMON_H #define __LINUX_IRQCHIP_ARM_GIC_COMMON_H -#include -#include +#include #define GICD_INT_DEF_PRI 0xa0 #define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ @@ -16,28 +15,6 @@ (GICD_INT_DEF_PRI << 8) |\ GICD_INT_DEF_PRI) -enum gic_type { - GIC_V2, - GIC_V3, -}; - -struct gic_kvm_info { - /* GIC type */ - enum gic_type type; - /* Virtual CPU interface */ - struct resource vcpu; - /* Interrupt number */ - unsigned int maint_irq; - /* Virtual control interface */ - struct resource vctrl; - /* vlpi support */ - bool has_v4; - /* rvpeid support */ - bool has_v4_1; -}; - -const struct gic_kvm_info *gic_get_kvm_info(void); - struct irq_domain; struct fwnode_handle; int gicv2m_init(struct fwnode_handle *parent_handle, diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h new file mode 100644 index 000000000000..a25d4da5697d --- /dev/null +++ b/include/linux/irqchip/arm-vgic-info.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * include/linux/irqchip/arm-vgic-info.h + * + * Copyright (C) 2016 ARM Limited, All Rights Reserved. + */ +#ifndef __LINUX_IRQCHIP_ARM_VGIC_INFO_H +#define __LINUX_IRQCHIP_ARM_VGIC_INFO_H + +#include +#include + +enum gic_type { + /* Full GICv2 */ + GIC_V2, + /* Full GICv3, optionally with v2 compat */ + GIC_V3, +}; + +struct gic_kvm_info { + /* GIC type */ + enum gic_type type; + /* Virtual CPU interface */ + struct resource vcpu; + /* Interrupt number */ + unsigned int maint_irq; + /* Virtual control interface */ + struct resource vctrl; + /* vlpi support */ + bool has_v4; + /* rvpeid support */ + bool has_v4_1; +}; + +#ifdef CONFIG_KVM +void vgic_set_kvm_info(const struct gic_kvm_info *info); +#else +static inline void vgic_set_kvm_info(const struct gic_kvm_info *info) {} +#endif + +#endif -- cgit From 74501499d4e0d4ba59ab2bc6be1873716549169d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 19 Feb 2021 16:39:31 +0000 Subject: KVM: arm64: Handle physical FIQ as an IRQ while running a guest As we we now entertain the possibility of FIQ being used on the host, treat the signalling of a FIQ while running a guest as an IRQ, causing an exit instead of a HYP panic. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/hyp-entry.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 5f49df4ffdd8..9aa9b73475c9 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -76,6 +76,7 @@ el1_trap: b __guest_exit el1_irq: +el1_fiq: get_vcpu_ptr x1, x0 mov x0, #ARM_EXCEPTION_IRQ b __guest_exit @@ -131,7 +132,6 @@ SYM_CODE_END(\label) invalid_vector el2t_error_invalid invalid_vector el2h_irq_invalid invalid_vector el2h_fiq_invalid - invalid_vector el1_fiq_invalid .ltorg @@ -179,12 +179,12 @@ SYM_CODE_START(__kvm_hyp_vector) valid_vect el1_sync // Synchronous 64-bit EL1 valid_vect el1_irq // IRQ 64-bit EL1 - invalid_vect el1_fiq_invalid // FIQ 64-bit EL1 + valid_vect el1_fiq // FIQ 64-bit EL1 valid_vect el1_error // Error 64-bit EL1 valid_vect el1_sync // Synchronous 32-bit EL1 valid_vect el1_irq // IRQ 32-bit EL1 - invalid_vect el1_fiq_invalid // FIQ 32-bit EL1 + valid_vect el1_fiq // FIQ 32-bit EL1 valid_vect el1_error // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_vector) -- cgit From 669062d2a1aa36661b490683fe17810aa24a9cfb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 28 Feb 2021 11:09:59 +0000 Subject: KVM: arm64: vgic: Be tolerant to the lack of maintenance interrupt masking As it turns out, not all the interrupt controllers are able to expose a vGIC maintenance interrupt that can be independently enabled/disabled. And to be fair, it doesn't really matter as all we require is for the interrupt to kick us out of guest mode out way or another. To that effect, add gic_kvm_info.no_maint_irq_mask for an interrupt controller to advertise the lack of masking. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 8 +++++++- include/linux/irqchip/arm-vgic-info.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 2fdb65529594..6752d084934d 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -519,12 +519,15 @@ void kvm_vgic_init_cpu_hardware(void) */ int kvm_vgic_hyp_init(void) { + bool has_mask; int ret; if (!gic_kvm_info) return -ENODEV; - if (!gic_kvm_info->maint_irq) { + has_mask = !gic_kvm_info->no_maint_irq_mask; + + if (has_mask && !gic_kvm_info->maint_irq) { kvm_err("No vgic maintenance irq\n"); return -ENXIO; } @@ -552,6 +555,9 @@ int kvm_vgic_hyp_init(void) if (ret) return ret; + if (!has_mask) + return 0; + ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, vgic_maintenance_handler, "vgic", kvm_get_running_vcpus()); diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h index a25d4da5697d..7c0d08ebb82c 100644 --- a/include/linux/irqchip/arm-vgic-info.h +++ b/include/linux/irqchip/arm-vgic-info.h @@ -24,6 +24,8 @@ struct gic_kvm_info { struct resource vcpu; /* Interrupt number */ unsigned int maint_irq; + /* No interrupt mask, no need to use the above field */ + bool no_maint_irq_mask; /* Virtual control interface */ struct resource vctrl; /* vlpi support */ -- cgit From f6c3e24fb721dda247f6691c809d6e6c413f22c7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 21:56:47 +0000 Subject: KVM: arm64: vgic: Let an interrupt controller advertise lack of HW deactivation The vGIC, as architected by ARM, allows a virtual interrupt to trigger the deactivation of a physical interrupt. This allows the following interrupt to be delivered without requiring an exit. However, some implementations have choosen not to implement this, meaning that we will need some unsavoury workarounds to deal with this. On detecting such a case, taint the kernel and spit a nastygram. We'll deal with this in later patches. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 10 ++++++++++ include/kvm/arm_vgic.h | 3 +++ include/linux/irqchip/arm-vgic-info.h | 2 ++ 3 files changed, 15 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 6752d084934d..340c51d87677 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -532,6 +532,16 @@ int kvm_vgic_hyp_init(void) return -ENXIO; } + /* + * If we get one of these oddball non-GICs, taint the kernel, + * as we have no idea of how they *really* behave. + */ + if (gic_kvm_info->no_hw_deactivation) { + kvm_info("Non-architectural vgic, tainting kernel\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + kvm_vgic_global_state.no_hw_deactivation = true; + } + switch (gic_kvm_info->type) { case GIC_V2: ret = vgic_v2_probe(gic_kvm_info); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ec621180ef09..e45b26e8d479 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -72,6 +72,9 @@ struct vgic_global { bool has_gicv4; bool has_gicv4_1; + /* Pseudo GICv3 from outer space */ + bool no_hw_deactivation; + /* GIC system register CPU interface */ struct static_key_false gicv3_cpuif; diff --git a/include/linux/irqchip/arm-vgic-info.h b/include/linux/irqchip/arm-vgic-info.h index 7c0d08ebb82c..a75b2c7de69d 100644 --- a/include/linux/irqchip/arm-vgic-info.h +++ b/include/linux/irqchip/arm-vgic-info.h @@ -32,6 +32,8 @@ struct gic_kvm_info { bool has_v4; /* rvpeid support */ bool has_v4_1; + /* Deactivation impared, subpar stuff */ + bool no_hw_deactivation; }; #ifdef CONFIG_KVM -- cgit From db75f1a33f82ad332b6e139c5960e01999969d2c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Mar 2021 17:39:39 +0000 Subject: KVM: arm64: vgic: move irq->get_input_level into an ops structure We already have the option to attach a callback to an interrupt to retrieve its pending state. As we are planning to expand this facility, move this callback into its own data structure. This will limit the size of individual interrupts as the ops structures can be shared across multiple interrupts. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 8 ++++++-- arch/arm64/kvm/vgic/vgic.c | 14 +++++++------- include/kvm/arm_vgic.h | 28 +++++++++++++++++----------- 3 files changed, 30 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 74e0699661e9..e2288b6bf435 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -1116,6 +1116,10 @@ bool kvm_arch_timer_get_input_level(int vintid) return kvm_timer_should_fire(timer); } +static struct irq_ops arch_timer_irq_ops = { + .get_input_level = kvm_arch_timer_get_input_level, +}; + int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); @@ -1143,7 +1147,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) ret = kvm_vgic_map_phys_irq(vcpu, map.direct_vtimer->host_timer_irq, map.direct_vtimer->irq.irq, - kvm_arch_timer_get_input_level); + &arch_timer_irq_ops); if (ret) return ret; @@ -1151,7 +1155,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) ret = kvm_vgic_map_phys_irq(vcpu, map.direct_ptimer->host_timer_irq, map.direct_ptimer->irq.irq, - kvm_arch_timer_get_input_level); + &arch_timer_irq_ops); } if (ret) diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 15b666200f0b..111bff47e471 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -182,8 +182,8 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq) BUG_ON(!irq->hw); - if (irq->get_input_level) - return irq->get_input_level(irq->intid); + if (irq->ops && irq->ops->get_input_level) + return irq->ops->get_input_level(irq->intid); WARN_ON(irq_get_irqchip_state(irq->host_irq, IRQCHIP_STATE_PENDING, @@ -480,7 +480,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, /* @irq->irq_lock must be held */ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, unsigned int host_irq, - bool (*get_input_level)(int vindid)) + struct irq_ops *ops) { struct irq_desc *desc; struct irq_data *data; @@ -500,7 +500,7 @@ static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, irq->hw = true; irq->host_irq = host_irq; irq->hwintid = data->hwirq; - irq->get_input_level = get_input_level; + irq->ops = ops; return 0; } @@ -509,11 +509,11 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) { irq->hw = false; irq->hwintid = 0; - irq->get_input_level = NULL; + irq->ops = NULL; } int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, - u32 vintid, bool (*get_input_level)(int vindid)) + u32 vintid, struct irq_ops *ops) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); unsigned long flags; @@ -522,7 +522,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, BUG_ON(!irq); raw_spin_lock_irqsave(&irq->irq_lock, flags); - ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level); + ret = kvm_vgic_map_irq(vcpu, irq, host_irq, ops); raw_spin_unlock_irqrestore(&irq->irq_lock, flags); vgic_put_irq(vcpu->kvm, irq); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index e45b26e8d479..e5f06df000f2 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -92,6 +92,21 @@ enum vgic_irq_config { VGIC_CONFIG_LEVEL }; +/* + * Per-irq ops overriding some common behavious. + * + * Always called in non-preemptible section and the functions can use + * kvm_arm_get_running_vcpu() to get the vcpu pointer for private IRQs. + */ +struct irq_ops { + /* + * Callback function pointer to in-kernel devices that can tell us the + * state of the input level of mapped level-triggered IRQ faster than + * peaking into the physical GIC. + */ + bool (*get_input_level)(int vintid); +}; + struct vgic_irq { raw_spinlock_t irq_lock; /* Protects the content of the struct */ struct list_head lpi_list; /* Used to link all LPIs together */ @@ -129,16 +144,7 @@ struct vgic_irq { u8 group; /* 0 == group 0, 1 == group 1 */ enum vgic_irq_config config; /* Level or edge */ - /* - * Callback function pointer to in-kernel devices that can tell us the - * state of the input level of mapped level-triggered IRQ faster than - * peaking into the physical GIC. - * - * Always called in non-preemptible section and the functions can use - * kvm_arm_get_running_vcpu() to get the vcpu pointer for private - * IRQs. - */ - bool (*get_input_level)(int vintid); + struct irq_ops *ops; void *owner; /* Opaque pointer to reserve an interrupt for in-kernel devices. */ @@ -355,7 +361,7 @@ void kvm_vgic_init_cpu_hardware(void); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, bool level, void *owner); int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, - u32 vintid, bool (*get_input_level)(int vindid)); + u32 vintid, struct irq_ops *ops); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid); bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid); -- cgit From 354920e79441c8a53ac73008b06d3b70ed06eb34 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 13:11:58 +0000 Subject: KVM: arm64: vgic: Implement SW-driven deactivation In order to deal with these systems that do not offer HW-based deactivation of interrupts, let implement a SW-based approach: - When the irq is queued into a LR, treat it as a pure virtual interrupt and set the EOI flag in the LR. - When the interrupt state is read back from the LR, force a deactivation when the state is invalid (neither active nor pending) Interrupts requiring such treatment get the VGIC_SW_RESAMPLE flag. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-v2.c | 19 +++++++++++++++---- arch/arm64/kvm/vgic/vgic-v3.c | 19 +++++++++++++++---- include/kvm/arm_vgic.h | 10 ++++++++++ 3 files changed, 40 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 11934c2af2f4..2c580204f1dc 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -108,11 +108,22 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) * If this causes us to lower the level, we have to also clear * the physical active state, since we will otherwise never be * told when the interrupt becomes asserted again. + * + * Another case is when the interrupt requires a helping hand + * on deactivation (no HW deactivation, for example). */ - if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) { - irq->line_level = vgic_get_phys_line_level(irq); + if (vgic_irq_is_mapped_level(irq)) { + bool resample = false; + + if (val & GICH_LR_PENDING_BIT) { + irq->line_level = vgic_get_phys_line_level(irq); + resample = !irq->line_level; + } else if (vgic_irq_needs_resampling(irq) && + !(irq->active || irq->pending_latch)) { + resample = true; + } - if (!irq->line_level) + if (resample) vgic_irq_set_phys_active(irq, false); } @@ -152,7 +163,7 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (irq->group) val |= GICH_LR_GROUP1; - if (irq->hw) { + if (irq->hw && !vgic_irq_needs_resampling(irq)) { val |= GICH_LR_HW; val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; /* diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 41ecf219c333..66004f61cd83 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -101,11 +101,22 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) * If this causes us to lower the level, we have to also clear * the physical active state, since we will otherwise never be * told when the interrupt becomes asserted again. + * + * Another case is when the interrupt requires a helping hand + * on deactivation (no HW deactivation, for example). */ - if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) { - irq->line_level = vgic_get_phys_line_level(irq); + if (vgic_irq_is_mapped_level(irq)) { + bool resample = false; + + if (val & ICH_LR_PENDING_BIT) { + irq->line_level = vgic_get_phys_line_level(irq); + resample = !irq->line_level; + } else if (vgic_irq_needs_resampling(irq) && + !(irq->active || irq->pending_latch)) { + resample = true; + } - if (!irq->line_level) + if (resample) vgic_irq_set_phys_active(irq, false); } @@ -136,7 +147,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) } } - if (irq->hw) { + if (irq->hw && !vgic_irq_needs_resampling(irq)) { val |= ICH_LR_HW; val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; /* diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index e5f06df000f2..e602d848fc1a 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -99,6 +99,11 @@ enum vgic_irq_config { * kvm_arm_get_running_vcpu() to get the vcpu pointer for private IRQs. */ struct irq_ops { + /* Per interrupt flags for special-cased interrupts */ + unsigned long flags; + +#define VGIC_IRQ_SW_RESAMPLE BIT(0) /* Clear the active state for resampling */ + /* * Callback function pointer to in-kernel devices that can tell us the * state of the input level of mapped level-triggered IRQ faster than @@ -150,6 +155,11 @@ struct vgic_irq { for in-kernel devices. */ }; +static inline bool vgic_irq_needs_resampling(struct vgic_irq *irq) +{ + return irq->ops && (irq->ops->flags & VGIC_IRQ_SW_RESAMPLE); +} + struct vgic_register_region; struct vgic_its; -- cgit From 2f2f7e39dbb31aa1db13c490a4e47502497510fe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 14:05:21 +0000 Subject: KVM: arm64: timer: Refactor IRQ configuration As we are about to add some more things to the timer IRQ configuration, move this code out of the main timer init code into its own set of functions. No functional changes. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 57 ++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index e2288b6bf435..3cd170388d88 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -973,6 +973,35 @@ static int kvm_timer_dying_cpu(unsigned int cpu) return 0; } +static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) +{ + *flags = irq_get_trigger_type(virq); + if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) { + kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n", + virq); + *flags = IRQF_TRIGGER_LOW; + } +} + +static int kvm_irq_init(struct arch_timer_kvm_info *info) +{ + if (info->virtual_irq <= 0) { + kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", + info->virtual_irq); + return -ENODEV; + } + + host_vtimer_irq = info->virtual_irq; + kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); + + if (info->physical_irq > 0) { + host_ptimer_irq = info->physical_irq; + kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); + } + + return 0; +} + int kvm_timer_hyp_init(bool has_gic) { struct arch_timer_kvm_info *info; @@ -986,22 +1015,11 @@ int kvm_timer_hyp_init(bool has_gic) return -ENODEV; } - /* First, do the virtual EL1 timer irq */ - - if (info->virtual_irq <= 0) { - kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", - info->virtual_irq); - return -ENODEV; - } - host_vtimer_irq = info->virtual_irq; + err = kvm_irq_init(info); + if (err) + return err; - host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); - if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && - host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { - kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n", - host_vtimer_irq); - host_vtimer_irq_flags = IRQF_TRIGGER_LOW; - } + /* First, do the virtual EL1 timer irq */ err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, "kvm guest vtimer", kvm_get_running_vcpus()); @@ -1027,15 +1045,6 @@ int kvm_timer_hyp_init(bool has_gic) /* Now let's do the physical EL1 timer irq */ if (info->physical_irq > 0) { - host_ptimer_irq = info->physical_irq; - host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq); - if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH && - host_ptimer_irq_flags != IRQF_TRIGGER_LOW) { - kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n", - host_ptimer_irq); - host_ptimer_irq_flags = IRQF_TRIGGER_LOW; - } - err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, "kvm guest ptimer", kvm_get_running_vcpus()); if (err) { -- cgit From 5f59229680f70078ac4c11db2ae89be087474144 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Mar 2021 14:21:21 +0000 Subject: KVM: arm64: timer: Add support for SW-based deactivation In order to deal with the lack of active state, we need to use the mask/unmask primitives (after all, the active state is just an additional mask on top of the normal one). To avoid adding a bunch of ugly conditionals in the timer and vgic code, let's use a timer-specific irqdomain to deal with the state conversion. Yes, this is an unexpected use of irqdomains, but there is no reason not to be just as creative as the designers of the HW... This involves overloading the vcpu_affinity, set_irqchip_state and eoi callbacks so that the rest of the KVM code can continue ignoring the oddities of the underlying platform. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arch_timer.c | 105 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 101 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 3cd170388d88..3df67c127489 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -973,6 +974,77 @@ static int kvm_timer_dying_cpu(unsigned int cpu) return 0; } +static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) +{ + if (vcpu) + irqd_set_forwarded_to_vcpu(d); + else + irqd_clr_forwarded_to_vcpu(d); + + return 0; +} + +static int timer_irq_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, bool val) +{ + if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d)) + return irq_chip_set_parent_state(d, which, val); + + if (val) + irq_chip_mask_parent(d); + else + irq_chip_unmask_parent(d); + + return 0; +} + +static void timer_irq_eoi(struct irq_data *d) +{ + if (!irqd_is_forwarded_to_vcpu(d)) + irq_chip_eoi_parent(d); +} + +static void timer_irq_ack(struct irq_data *d) +{ + d = d->parent_data; + if (d->chip->irq_ack) + d->chip->irq_ack(d); +} + +static struct irq_chip timer_chip = { + .name = "KVM", + .irq_ack = timer_irq_ack, + .irq_mask = irq_chip_mask_parent, + .irq_unmask = irq_chip_unmask_parent, + .irq_eoi = timer_irq_eoi, + .irq_set_type = irq_chip_set_type_parent, + .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity, + .irq_set_irqchip_state = timer_irq_set_irqchip_state, +}; + +static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + irq_hw_number_t hwirq = (uintptr_t)arg; + + return irq_domain_set_hwirq_and_chip(domain, virq, hwirq, + &timer_chip, NULL); +} + +static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ +} + +static const struct irq_domain_ops timer_domain_ops = { + .alloc = timer_irq_domain_alloc, + .free = timer_irq_domain_free, +}; + +static struct irq_ops arch_timer_irq_ops = { + .get_input_level = kvm_arch_timer_get_input_level, +}; + static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) { *flags = irq_get_trigger_type(virq); @@ -985,6 +1057,8 @@ static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags) static int kvm_irq_init(struct arch_timer_kvm_info *info) { + struct irq_domain *domain = NULL; + if (info->virtual_irq <= 0) { kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", info->virtual_irq); @@ -994,9 +1068,36 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info) host_vtimer_irq = info->virtual_irq; kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags); + if (kvm_vgic_global_state.no_hw_deactivation) { + struct fwnode_handle *fwnode; + struct irq_data *data; + + fwnode = irq_domain_alloc_named_fwnode("kvm-timer"); + if (!fwnode) + return -ENOMEM; + + /* Assume both vtimer and ptimer in the same parent */ + data = irq_get_irq_data(host_vtimer_irq); + domain = irq_domain_create_hierarchy(data->domain, 0, + NR_KVM_TIMERS, fwnode, + &timer_domain_ops, NULL); + if (!domain) { + irq_domain_free_fwnode(fwnode); + return -ENOMEM; + } + + arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE; + WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq, + (void *)TIMER_VTIMER)); + } + if (info->physical_irq > 0) { host_ptimer_irq = info->physical_irq; kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags); + + if (domain) + WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq, + (void *)TIMER_PTIMER)); } return 0; @@ -1125,10 +1226,6 @@ bool kvm_arch_timer_get_input_level(int vintid) return kvm_timer_should_fire(timer); } -static struct irq_ops arch_timer_irq_ops = { - .get_input_level = kvm_arch_timer_get_input_level, -}; - int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu_timer(vcpu); -- cgit From fd6f17bade2147b31198ad00b22d3acf5a398aec Mon Sep 17 00:00:00 2001 From: Keqian Zhu Date: Fri, 7 May 2021 19:03:21 +0800 Subject: KVM: arm64: Remove the creation time's mapping of MMIO regions The MMIO regions may be unmapped for many reasons and can be remapped by stage2 fault path. Map MMIO regions at creation time becomes a minor optimization and makes these two mapping path hard to sync. Remove the mapping code while keep the useful sanity check. Signed-off-by: Keqian Zhu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210507110322.23348-2-zhukeqian1@huawei.com --- arch/arm64/kvm/mmu.c | 38 +++----------------------------------- 1 file changed, 3 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c10207fed2f3..e982178c8c72 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1346,7 +1346,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, { hva_t hva = mem->userspace_addr; hva_t reg_end = hva + mem->memory_size; - bool writable = !(mem->flags & KVM_MEM_READONLY); int ret = 0; if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && @@ -1363,8 +1362,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, mmap_read_lock(current->mm); /* * A memory region could potentially cover multiple VMAs, and any holes - * between them, so iterate over all of them to find out if we can map - * any of them right now. + * between them, so iterate over all of them. * * +--------------------------------------------+ * +---------------+----------------+ +----------------+ @@ -1375,51 +1373,21 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, */ do { struct vm_area_struct *vma; - hva_t vm_start, vm_end; vma = find_vma_intersection(current->mm, hva, reg_end); if (!vma) break; - /* - * Take the intersection of this VMA with the memory region - */ - vm_start = max(hva, vma->vm_start); - vm_end = min(reg_end, vma->vm_end); - if (vma->vm_flags & VM_PFNMAP) { - gpa_t gpa = mem->guest_phys_addr + - (vm_start - mem->userspace_addr); - phys_addr_t pa; - - pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; - pa += vm_start - vma->vm_start; - /* IO region dirty page logging not allowed */ if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { ret = -EINVAL; - goto out; - } - - ret = kvm_phys_addr_ioremap(kvm, gpa, pa, - vm_end - vm_start, - writable); - if (ret) break; + } } - hva = vm_end; + hva = min(reg_end, vma->vm_end); } while (hva < reg_end); - if (change == KVM_MR_FLAGS_ONLY) - goto out; - - spin_lock(&kvm->mmu_lock); - if (ret) - unmap_stage2_range(&kvm->arch.mmu, mem->guest_phys_addr, mem->memory_size); - else if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) - stage2_flush_memslot(kvm, memslot); - spin_unlock(&kvm->mmu_lock); -out: mmap_read_unlock(current->mm); return ret; } -- cgit From 2aa53d68cee6603931f73b28ef6b51ff3fde9397 Mon Sep 17 00:00:00 2001 From: Keqian Zhu Date: Fri, 7 May 2021 19:03:22 +0800 Subject: KVM: arm64: Try stage2 block mapping for host device MMIO The MMIO region of a device maybe huge (GB level), try to use block mapping in stage2 to speedup both map and unmap. Compared to normal memory mapping, we should consider two more points when try block mapping for MMIO region: 1. For normal memory mapping, the PA(host physical address) and HVA have same alignment within PUD_SIZE or PMD_SIZE when we use the HVA to request hugepage, so we don't need to consider PA alignment when verifing block mapping. But for device memory mapping, the PA and HVA may have different alignment. 2. For normal memory mapping, we are sure hugepage size properly fit into vma, so we don't check whether the mapping size exceeds the boundary of vma. But for device memory mapping, we should pay attention to this. This adds get_vma_page_shift() to get page shift for both normal memory and device MMIO region, and check these two points when selecting block mapping size for MMIO region. Signed-off-by: Keqian Zhu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210507110322.23348-3-zhukeqian1@huawei.com --- arch/arm64/kvm/mmu.c | 61 +++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index e982178c8c72..5742ba765ff9 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -822,6 +822,35 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot, return PAGE_SIZE; } +static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva) +{ + unsigned long pa; + + if (is_vm_hugetlb_page(vma) && !(vma->vm_flags & VM_PFNMAP)) + return huge_page_shift(hstate_vma(vma)); + + if (!(vma->vm_flags & VM_PFNMAP)) + return PAGE_SHIFT; + + VM_BUG_ON(is_vm_hugetlb_page(vma)); + + pa = (vma->vm_pgoff << PAGE_SHIFT) + (hva - vma->vm_start); + +#ifndef __PAGETABLE_PMD_FOLDED + if ((hva & (PUD_SIZE - 1)) == (pa & (PUD_SIZE - 1)) && + ALIGN_DOWN(hva, PUD_SIZE) >= vma->vm_start && + ALIGN(hva, PUD_SIZE) <= vma->vm_end) + return PUD_SHIFT; +#endif + + if ((hva & (PMD_SIZE - 1)) == (pa & (PMD_SIZE - 1)) && + ALIGN_DOWN(hva, PMD_SIZE) >= vma->vm_start && + ALIGN(hva, PMD_SIZE) <= vma->vm_end) + return PMD_SHIFT; + + return PAGE_SHIFT; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -853,7 +882,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - /* Let's check if we will get back a huge page backed by hugetlbfs */ + /* + * Let's check if we will get back a huge page backed by hugetlbfs, or + * get block mapping for device MMIO region. + */ mmap_read_lock(current->mm); vma = find_vma_intersection(current->mm, hva, hva + 1); if (unlikely(!vma)) { @@ -862,15 +894,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - if (is_vm_hugetlb_page(vma)) - vma_shift = huge_page_shift(hstate_vma(vma)); - else - vma_shift = PAGE_SHIFT; - - if (logging_active || - (vma->vm_flags & VM_PFNMAP)) { + /* + * logging_active is guaranteed to never be true for VM_PFNMAP + * memslots. + */ + if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; + } else { + vma_shift = get_vma_page_shift(vma, hva); } switch (vma_shift) { @@ -943,8 +975,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; if (kvm_is_device_pfn(pfn)) { + /* + * If the page was identified as device early by looking at + * the VMA flags, vma_pagesize is already representing the + * largest quantity we can map. If instead it was mapped + * via gfn_to_pfn_prot(), vma_pagesize is set to PAGE_SIZE + * and must not be upgraded. + * + * In both cases, we don't let transparent_hugepage_adjust() + * change things at the last minute. + */ device = true; - force_pte = true; } else if (logging_active && !write_fault) { /* * Only actually map the page as writable if this was a write @@ -965,7 +1006,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * If we are not forced to use page mapping, check if we are * backed by a THP and thus use block mapping if possible. */ - if (vma_pagesize == PAGE_SIZE && !force_pte) + if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) vma_pagesize = transparent_hugepage_adjust(memslot, hva, &pfn, &fault_ipa); if (writable) -- cgit From 6cbf874e51b68e5b2eb0cc50be3676f5d5601dab Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:12 +0000 Subject: KVM: arm64: Move hyp_pool locking out of refcount helpers The hyp_page refcount helpers currently rely on the hyp_pool lock for serialization. However, this means the refcounts can't be changed from the buddy allocator core as it already holds the lock, which means pages have to go through odd transient states. For example, when a page is freed, its refcount is set to 0, and the lock is transiently released before the page can be attached to a free list in the buddy tree. This is currently harmless as the allocator checks the list node of each page to see if it is available for allocation or not, but it means the page refcount can't be trusted to represent the state of the page even if the pool lock is held. In order to fix this, remove the pool locking from the refcount helpers, and move all the logic to the buddy allocator. This will simplify the removal of the list node from struct hyp_page in a later patch. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-2-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/gfp.h | 35 ---------------------------- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 43 ++++++++++++++++++++++++++--------- 2 files changed, 32 insertions(+), 46 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index 18a4494337bd..f2c84e4fa40f 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -22,41 +22,6 @@ struct hyp_pool { unsigned int max_order; }; -static inline void hyp_page_ref_inc(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - p->refcount++; - hyp_spin_unlock(&pool->lock); -} - -static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - int ret; - - hyp_spin_lock(&pool->lock); - p->refcount--; - ret = (p->refcount == 0); - hyp_spin_unlock(&pool->lock); - - return ret; -} - -static inline void hyp_set_page_refcounted(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - if (p->refcount) { - hyp_spin_unlock(&pool->lock); - BUG(); - } - p->refcount = 1; - hyp_spin_unlock(&pool->lock); -} - /* Allocation */ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order); void hyp_get_page(void *addr); diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 237e03bf0cb1..d666f4789e31 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -93,15 +93,6 @@ static void __hyp_attach_page(struct hyp_pool *pool, list_add_tail(&p->node, &pool->free_area[order]); } -static void hyp_attach_page(struct hyp_page *p) -{ - struct hyp_pool *pool = hyp_page_to_pool(p); - - hyp_spin_lock(&pool->lock); - __hyp_attach_page(pool, p); - hyp_spin_unlock(&pool->lock); -} - static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, struct hyp_page *p, unsigned int order) @@ -125,19 +116,49 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, return p; } +static inline void hyp_page_ref_inc(struct hyp_page *p) +{ + p->refcount++; +} + +static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +{ + p->refcount--; + return (p->refcount == 0); +} + +static inline void hyp_set_page_refcounted(struct hyp_page *p) +{ + BUG_ON(p->refcount); + p->refcount = 1; +} + +/* + * Changes to the buddy tree and page refcounts must be done with the hyp_pool + * lock held. If a refcount change requires an update to the buddy tree (e.g. + * hyp_put_page()), both operations must be done within the same critical + * section to guarantee transient states (e.g. a page with null refcount but + * not yet attached to a free list) can't be observed by well-behaved readers. + */ void hyp_put_page(void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); + struct hyp_pool *pool = hyp_page_to_pool(p); + hyp_spin_lock(&pool->lock); if (hyp_page_ref_dec_and_test(p)) - hyp_attach_page(p); + __hyp_attach_page(pool, p); + hyp_spin_unlock(&pool->lock); } void hyp_get_page(void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); + struct hyp_pool *pool = hyp_page_to_pool(p); + hyp_spin_lock(&pool->lock); hyp_page_ref_inc(p); + hyp_spin_unlock(&pool->lock); } void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) @@ -159,8 +180,8 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) p = list_first_entry(&pool->free_area[i], struct hyp_page, node); p = __hyp_extract_page(pool, p, order); - hyp_spin_unlock(&pool->lock); hyp_set_page_refcounted(p); + hyp_spin_unlock(&pool->lock); return hyp_page_to_virt(p); } -- cgit From 581982decc635c93934aaeb88d62c21238c63f11 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:13 +0000 Subject: KVM: arm64: Use refcount at hyp to check page availability The hyp buddy allocator currently checks the struct hyp_page list node to see if a page is available for allocation or not when trying to coalesce memory. Now that decrementing the refcount and attaching to the buddy tree is done in the same critical section, we can rely on the refcount of the buddy page to be in sync, which allows to replace the list node check by a refcount check. This will ease removing the list node from struct hyp_page later on. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-3-qperret@google.com --- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index d666f4789e31..2602577daa00 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -55,7 +55,7 @@ static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, { struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order); - if (!buddy || buddy->order != order || list_empty(&buddy->node)) + if (!buddy || buddy->order != order || buddy->refcount) return NULL; return buddy; @@ -133,6 +133,12 @@ static inline void hyp_set_page_refcounted(struct hyp_page *p) p->refcount = 1; } +static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p) +{ + if (hyp_page_ref_dec_and_test(p)) + __hyp_attach_page(pool, p); +} + /* * Changes to the buddy tree and page refcounts must be done with the hyp_pool * lock held. If a refcount change requires an update to the buddy tree (e.g. @@ -146,8 +152,7 @@ void hyp_put_page(void *addr) struct hyp_pool *pool = hyp_page_to_pool(p); hyp_spin_lock(&pool->lock); - if (hyp_page_ref_dec_and_test(p)) - __hyp_attach_page(pool, p); + __hyp_put_page(pool, p); hyp_spin_unlock(&pool->lock); } @@ -202,15 +207,16 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, /* Init the vmemmap portion */ p = hyp_phys_to_page(phys); - memset(p, 0, sizeof(*p) * nr_pages); for (i = 0; i < nr_pages; i++) { p[i].pool = pool; + p[i].order = 0; INIT_LIST_HEAD(&p[i].node); + hyp_set_page_refcounted(&p[i]); } /* Attach the unused pages to the buddy tree */ for (i = reserved_pages; i < nr_pages; i++) - __hyp_attach_page(pool, &p[i]); + __hyp_put_page(pool, &p[i]); return 0; } -- cgit From 914cde58a03cc5eef858db34687433e17d0e44be Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:14 +0000 Subject: KVM: arm64: Remove list_head from hyp_page The list_head member of struct hyp_page is only needed when the page is attached to a free-list, which by definition implies the page is free. As such, nothing prevents us from using the page itself to store the list_head, hence reducing the size of the vmemmap. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-4-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/memory.h | 1 - arch/arm64/kvm/hyp/nvhe/page_alloc.c | 39 +++++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index fd78bde939ee..7691ab495eb4 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -12,7 +12,6 @@ struct hyp_page { unsigned int refcount; unsigned int order; struct hyp_pool *pool; - struct list_head node; }; extern u64 __hyp_vmemmap; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 2602577daa00..34f0eb026dd2 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -62,6 +62,34 @@ static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, } +/* + * Pages that are available for allocation are tracked in free-lists, so we use + * the pages themselves to store the list nodes to avoid wasting space. As the + * allocator always returns zeroed pages (which are zeroed on the hyp_put_page() + * path to optimize allocation speed), we also need to clean-up the list node in + * each page when we take it out of the list. + */ +static inline void page_remove_from_list(struct hyp_page *p) +{ + struct list_head *node = hyp_page_to_virt(p); + + __list_del_entry(node); + memset(node, 0, sizeof(*node)); +} + +static inline void page_add_to_list(struct hyp_page *p, struct list_head *head) +{ + struct list_head *node = hyp_page_to_virt(p); + + INIT_LIST_HEAD(node); + list_add_tail(node, head); +} + +static inline struct hyp_page *node_to_page(struct list_head *node) +{ + return hyp_virt_to_page(node); +} + static void __hyp_attach_page(struct hyp_pool *pool, struct hyp_page *p) { @@ -83,14 +111,14 @@ static void __hyp_attach_page(struct hyp_pool *pool, break; /* Take the buddy out of its list, and coallesce with @p */ - list_del_init(&buddy->node); + page_remove_from_list(buddy); buddy->order = HYP_NO_ORDER; p = min(p, buddy); } /* Mark the new head, and insert it */ p->order = order; - list_add_tail(&p->node, &pool->free_area[order]); + page_add_to_list(p, &pool->free_area[order]); } static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, @@ -99,7 +127,7 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, { struct hyp_page *buddy; - list_del_init(&p->node); + page_remove_from_list(p); while (p->order > order) { /* * The buddy of order n - 1 currently has HYP_NO_ORDER as it @@ -110,7 +138,7 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, p->order--; buddy = __find_buddy_nocheck(pool, p, p->order); buddy->order = p->order; - list_add_tail(&buddy->node, &pool->free_area[buddy->order]); + page_add_to_list(buddy, &pool->free_area[buddy->order]); } return p; @@ -182,7 +210,7 @@ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) } /* Extract it from the tree at the right order */ - p = list_first_entry(&pool->free_area[i], struct hyp_page, node); + p = node_to_page(pool->free_area[i].next); p = __hyp_extract_page(pool, p, order); hyp_set_page_refcounted(p); @@ -210,7 +238,6 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, for (i = 0; i < nr_pages; i++) { p[i].pool = pool; p[i].order = 0; - INIT_LIST_HEAD(&p[i].node); hyp_set_page_refcounted(&p[i]); } -- cgit From 7c350ea39e53ade33ca7be00b0947f2b9f53dda0 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:15 +0000 Subject: KVM: arm64: Unify MMIO and mem host stage-2 pools We currently maintain two separate memory pools for the host stage-2, one for pages used in the page-table when mapping memory regions, and the other to map MMIO regions. The former is large enough to map all of memory with page granularity and the latter can cover an arbitrary portion of IPA space, but allows to 'recycle' pages. However, this split makes accounting difficult to manage as pages at intermediate levels of the page-table may be used to map both memory and MMIO regions. Simplify the scheme by merging both pools into one. This means we can now hit the -ENOMEM case in the memory abort path, but we're still guaranteed forward-progress in the worst case by unmapping MMIO regions. On the plus side this also means we can usually map a lot more MMIO space at once if memory ranges happen to be mapped with block mappings. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-5-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 2 +- arch/arm64/kvm/hyp/include/nvhe/mm.h | 13 ++++---- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 46 +++++++++++---------------- arch/arm64/kvm/hyp/nvhe/setup.c | 16 +++------- arch/arm64/kvm/hyp/reserved_mem.c | 3 +- 5 files changed, 32 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 42d81ec739fa..9c227d87c36d 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -23,7 +23,7 @@ extern struct host_kvm host_kvm; int __pkvm_prot_finalize(void); int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end); -int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool); +int kvm_host_prepare_stage2(void *pgt_pool_base); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); static __always_inline void __load_host_stage2(void) diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 0095f6289742..8ec3a5a7744b 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -78,19 +78,20 @@ static inline unsigned long hyp_s1_pgtable_pages(void) return res; } -static inline unsigned long host_s2_mem_pgtable_pages(void) +static inline unsigned long host_s2_pgtable_pages(void) { + unsigned long res; + /* * Include an extra 16 pages to safely upper-bound the worst case of * concatenated pgds. */ - return __hyp_pgtable_total_pages() + 16; -} + res = __hyp_pgtable_total_pages() + 16; -static inline unsigned long host_s2_dev_pgtable_pages(void) -{ /* Allow 1 GiB for MMIO mappings */ - return __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + + return res; } #endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 4b60c0056c04..c8ed7e86231b 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -23,8 +23,7 @@ extern unsigned long hyp_nr_cpus; struct host_kvm host_kvm; -static struct hyp_pool host_s2_mem; -static struct hyp_pool host_s2_dev; +static struct hyp_pool host_s2_pool; /* * Copies of the host's CPU features registers holding sanitized values. @@ -36,7 +35,7 @@ static const u8 pkvm_hyp_id = 1; static void *host_s2_zalloc_pages_exact(size_t size) { - return hyp_alloc_pages(&host_s2_mem, get_order(size)); + return hyp_alloc_pages(&host_s2_pool, get_order(size)); } static void *host_s2_zalloc_page(void *pool) @@ -44,20 +43,14 @@ static void *host_s2_zalloc_page(void *pool) return hyp_alloc_pages(pool, 0); } -static int prepare_s2_pools(void *mem_pgt_pool, void *dev_pgt_pool) +static int prepare_s2_pool(void *pgt_pool_base) { unsigned long nr_pages, pfn; int ret; - pfn = hyp_virt_to_pfn(mem_pgt_pool); - nr_pages = host_s2_mem_pgtable_pages(); - ret = hyp_pool_init(&host_s2_mem, pfn, nr_pages, 0); - if (ret) - return ret; - - pfn = hyp_virt_to_pfn(dev_pgt_pool); - nr_pages = host_s2_dev_pgtable_pages(); - ret = hyp_pool_init(&host_s2_dev, pfn, nr_pages, 0); + pfn = hyp_virt_to_pfn(pgt_pool_base); + nr_pages = host_s2_pgtable_pages(); + ret = hyp_pool_init(&host_s2_pool, pfn, nr_pages, 0); if (ret) return ret; @@ -86,7 +79,7 @@ static void prepare_host_vtcr(void) id_aa64mmfr1_el1_sys_val, phys_shift); } -int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool) +int kvm_host_prepare_stage2(void *pgt_pool_base) { struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; int ret; @@ -94,7 +87,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool) prepare_host_vtcr(); hyp_spin_lock_init(&host_kvm.lock); - ret = prepare_s2_pools(mem_pgt_pool, dev_pgt_pool); + ret = prepare_s2_pool(pgt_pool_base); if (ret) return ret; @@ -199,11 +192,10 @@ static bool range_is_memory(u64 start, u64 end) } static inline int __host_stage2_idmap(u64 start, u64 end, - enum kvm_pgtable_prot prot, - struct hyp_pool *pool) + enum kvm_pgtable_prot prot) { return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, - prot, pool); + prot, &host_s2_pool); } static int host_stage2_idmap(u64 addr) @@ -211,7 +203,6 @@ static int host_stage2_idmap(u64 addr) enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W; struct kvm_mem_range range; bool is_memory = find_mem_range(addr, &range); - struct hyp_pool *pool = is_memory ? &host_s2_mem : &host_s2_dev; int ret; if (is_memory) @@ -222,22 +213,21 @@ static int host_stage2_idmap(u64 addr) if (ret) goto unlock; - ret = __host_stage2_idmap(range.start, range.end, prot, pool); - if (is_memory || ret != -ENOMEM) + ret = __host_stage2_idmap(range.start, range.end, prot); + if (ret != -ENOMEM) goto unlock; /* - * host_s2_mem has been provided with enough pages to cover all of - * memory with page granularity, so we should never hit the ENOMEM case. - * However, it is difficult to know how much of the MMIO range we will - * need to cover upfront, so we may need to 'recycle' the pages if we - * run out. + * The pool has been provided with enough pages to cover all of memory + * with page granularity, but it is difficult to know how much of the + * MMIO range we will need to cover upfront, so we may need to 'recycle' + * the pages if we run out. */ ret = host_stage2_unmap_dev_all(); if (ret) goto unlock; - ret = __host_stage2_idmap(range.start, range.end, prot, pool); + ret = __host_stage2_idmap(range.start, range.end, prot); unlock: hyp_spin_unlock(&host_kvm.lock); @@ -258,7 +248,7 @@ int __pkvm_mark_hyp(phys_addr_t start, phys_addr_t end) hyp_spin_lock(&host_kvm.lock); ret = kvm_pgtable_stage2_set_owner(&host_kvm.pgt, start, end - start, - &host_s2_mem, pkvm_hyp_id); + &host_s2_pool, pkvm_hyp_id); hyp_spin_unlock(&host_kvm.lock); return ret != -EAGAIN ? ret : 0; diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index a3d3a275344e..1cff3259a493 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -24,8 +24,7 @@ unsigned long hyp_nr_cpus; static void *vmemmap_base; static void *hyp_pgt_base; -static void *host_s2_mem_pgt_base; -static void *host_s2_dev_pgt_base; +static void *host_s2_pgt_base; static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; static int divide_memory_pool(void *virt, unsigned long size) @@ -45,14 +44,9 @@ static int divide_memory_pool(void *virt, unsigned long size) if (!hyp_pgt_base) return -ENOMEM; - nr_pages = host_s2_mem_pgtable_pages(); - host_s2_mem_pgt_base = hyp_early_alloc_contig(nr_pages); - if (!host_s2_mem_pgt_base) - return -ENOMEM; - - nr_pages = host_s2_dev_pgtable_pages(); - host_s2_dev_pgt_base = hyp_early_alloc_contig(nr_pages); - if (!host_s2_dev_pgt_base) + nr_pages = host_s2_pgtable_pages(); + host_s2_pgt_base = hyp_early_alloc_contig(nr_pages); + if (!host_s2_pgt_base) return -ENOMEM; return 0; @@ -158,7 +152,7 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; - ret = kvm_host_prepare_stage2(host_s2_mem_pgt_base, host_s2_dev_pgt_base); + ret = kvm_host_prepare_stage2(host_s2_pgt_base); if (ret) goto out; diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c index 83ca23ac259b..d654921dd09b 100644 --- a/arch/arm64/kvm/hyp/reserved_mem.c +++ b/arch/arm64/kvm/hyp/reserved_mem.c @@ -71,8 +71,7 @@ void __init kvm_hyp_reserve(void) } hyp_mem_pages += hyp_s1_pgtable_pages(); - hyp_mem_pages += host_s2_mem_pgtable_pages(); - hyp_mem_pages += host_s2_dev_pgtable_pages(); + hyp_mem_pages += host_s2_pgtable_pages(); /* * The hyp_vmemmap needs to be backed by pages, but these pages -- cgit From d978b9cfe6fe8008467f8c5d51677f52e7815b39 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:16 +0000 Subject: KVM: arm64: Remove hyp_pool pointer from struct hyp_page Each struct hyp_page currently contains a pointer to a hyp_pool struct where the page should be freed if its refcount reaches 0. However, this information can always be inferred from the context in the EL2 code, so drop the pointer to save a few bytes in the vmemmap. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-6-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/gfp.h | 4 ++-- arch/arm64/kvm/hyp/include/nvhe/memory.h | 2 -- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 14 ++++++++++++-- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 7 ++----- arch/arm64/kvm/hyp/nvhe/setup.c | 14 ++++++++++++-- 5 files changed, 28 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index f2c84e4fa40f..3ea7bfb6c380 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -24,8 +24,8 @@ struct hyp_pool { /* Allocation */ void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order); -void hyp_get_page(void *addr); -void hyp_put_page(void *addr); +void hyp_get_page(struct hyp_pool *pool, void *addr); +void hyp_put_page(struct hyp_pool *pool, void *addr); /* Used pages cannot be freed */ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 7691ab495eb4..991636be2f46 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -7,11 +7,9 @@ #include -struct hyp_pool; struct hyp_page { unsigned int refcount; unsigned int order; - struct hyp_pool *pool; }; extern u64 __hyp_vmemmap; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index c8ed7e86231b..d938ce95d3bd 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -43,6 +43,16 @@ static void *host_s2_zalloc_page(void *pool) return hyp_alloc_pages(pool, 0); } +static void host_s2_get_page(void *addr) +{ + hyp_get_page(&host_s2_pool, addr); +} + +static void host_s2_put_page(void *addr) +{ + hyp_put_page(&host_s2_pool, addr); +} + static int prepare_s2_pool(void *pgt_pool_base) { unsigned long nr_pages, pfn; @@ -60,8 +70,8 @@ static int prepare_s2_pool(void *pgt_pool_base) .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, .page_count = hyp_page_count, - .get_page = hyp_get_page, - .put_page = hyp_put_page, + .get_page = host_s2_get_page, + .put_page = host_s2_put_page, }; return 0; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 34f0eb026dd2..e3689def7033 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -174,20 +174,18 @@ static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p) * section to guarantee transient states (e.g. a page with null refcount but * not yet attached to a free list) can't be observed by well-behaved readers. */ -void hyp_put_page(void *addr) +void hyp_put_page(struct hyp_pool *pool, void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); - struct hyp_pool *pool = hyp_page_to_pool(p); hyp_spin_lock(&pool->lock); __hyp_put_page(pool, p); hyp_spin_unlock(&pool->lock); } -void hyp_get_page(void *addr) +void hyp_get_page(struct hyp_pool *pool, void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); - struct hyp_pool *pool = hyp_page_to_pool(p); hyp_spin_lock(&pool->lock); hyp_page_ref_inc(p); @@ -236,7 +234,6 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, /* Init the vmemmap portion */ p = hyp_phys_to_page(phys); for (i = 0; i < nr_pages; i++) { - p[i].pool = pool; p[i].order = 0; hyp_set_page_refcounted(&p[i]); } diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 1cff3259a493..f834833ac921 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -137,6 +137,16 @@ static void *hyp_zalloc_hyp_page(void *arg) return hyp_alloc_pages(&hpool, 0); } +static void hpool_get_page(void *addr) +{ + hyp_get_page(&hpool, addr); +} + +static void hpool_put_page(void *addr) +{ + hyp_put_page(&hpool, addr); +} + void __noreturn __pkvm_init_finalise(void) { struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); @@ -160,8 +170,8 @@ void __noreturn __pkvm_init_finalise(void) .zalloc_page = hyp_zalloc_hyp_page, .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, - .get_page = hyp_get_page, - .put_page = hyp_put_page, + .get_page = hpool_get_page, + .put_page = hpool_put_page, }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; -- cgit From 87ec0606733e1aa9568f54ddb41f03aa6b5687f2 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:17 +0000 Subject: KVM: arm64: Use less bits for hyp_page order The hyp_page order is currently encoded on 4 bytes even though it is guaranteed to be smaller than this. Make it 2 bytes to reduce the hyp vmemmap overhead. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-7-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/gfp.h | 6 +++--- arch/arm64/kvm/hyp/include/nvhe/memory.h | 2 +- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index 3ea7bfb6c380..fb0f523d1492 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -7,7 +7,7 @@ #include #include -#define HYP_NO_ORDER UINT_MAX +#define HYP_NO_ORDER USHRT_MAX struct hyp_pool { /* @@ -19,11 +19,11 @@ struct hyp_pool { struct list_head free_area[MAX_ORDER]; phys_addr_t range_start; phys_addr_t range_end; - unsigned int max_order; + unsigned short max_order; }; /* Allocation */ -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order); +void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order); void hyp_get_page(struct hyp_pool *pool, void *addr); void hyp_put_page(struct hyp_pool *pool, void *addr); diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 991636be2f46..3fe34fa30ea4 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -9,7 +9,7 @@ struct hyp_page { unsigned int refcount; - unsigned int order; + unsigned short order; }; extern u64 __hyp_vmemmap; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index e3689def7033..be07055bbc10 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -32,7 +32,7 @@ u64 __hyp_vmemmap; */ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { phys_addr_t addr = hyp_page_to_phys(p); @@ -51,7 +51,7 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, /* Find a buddy page currently available for allocation */ static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order); @@ -93,7 +93,7 @@ static inline struct hyp_page *node_to_page(struct list_head *node) static void __hyp_attach_page(struct hyp_pool *pool, struct hyp_page *p) { - unsigned int order = p->order; + unsigned short order = p->order; struct hyp_page *buddy; memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); @@ -123,7 +123,7 @@ static void __hyp_attach_page(struct hyp_pool *pool, static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, struct hyp_page *p, - unsigned int order) + unsigned short order) { struct hyp_page *buddy; @@ -192,9 +192,9 @@ void hyp_get_page(struct hyp_pool *pool, void *addr) hyp_spin_unlock(&pool->lock); } -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned int order) +void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order) { - unsigned int i = order; + unsigned short i = order; struct hyp_page *p; hyp_spin_lock(&pool->lock); -- cgit From 6929586d8eddad184f43526efe7bf0a8be4f18b2 Mon Sep 17 00:00:00 2001 From: Quentin Perret Date: Tue, 8 Jun 2021 11:45:18 +0000 Subject: KVM: arm64: Use less bits for hyp_page refcount The hyp_page refcount is currently encoded on 4 bytes even though we never need to count that many objects in a page. Make it 2 bytes to save some space in the vmemmap. As overflows are more likely to happen as well, make sure to catch those with a BUG in the increment function. Signed-off-by: Quentin Perret Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210608114518.748712-8-qperret@google.com --- arch/arm64/kvm/hyp/include/nvhe/memory.h | 2 +- arch/arm64/kvm/hyp/nvhe/page_alloc.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 3fe34fa30ea4..592b7edb3edb 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -8,7 +8,7 @@ #include struct hyp_page { - unsigned int refcount; + unsigned short refcount; unsigned short order; }; diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index be07055bbc10..41fc25bdfb34 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -146,6 +146,7 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, static inline void hyp_page_ref_inc(struct hyp_page *p) { + BUG_ON(p->refcount == USHRT_MAX); p->refcount++; } -- cgit From 6204004de3160900435bdb4b9a2fb8749a9277d2 Mon Sep 17 00:00:00 2001 From: Yanan Wang Date: Thu, 17 Jun 2021 18:58:21 +0800 Subject: KVM: arm64: Introduce two cache maintenance callbacks To prepare for performing CMOs for guest stage-2 in the fault handlers in pgtable.c, here introduce two cache maintenance callbacks in struct kvm_pgtable_mm_ops. We also adjust the comment alignment for the existing part but make no real content change at all. Reviewed-by: Fuad Tabba Signed-off-by: Yanan Wang [maz: fixed up comments and renamed callbacks] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210617105824.31752-2-wangyanan55@huawei.com --- arch/arm64/include/asm/kvm_pgtable.h | 42 +++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index c3674c47d48c..f004c0115d89 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -27,23 +27,29 @@ typedef u64 kvm_pte_t; /** * struct kvm_pgtable_mm_ops - Memory management callbacks. - * @zalloc_page: Allocate a single zeroed memory page. The @arg parameter - * can be used by the walker to pass a memcache. The - * initial refcount of the page is 1. - * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. The - * @size parameter is in bytes, and is rounded-up to the - * next page boundary. The resulting allocation is - * physically contiguous. - * @free_pages_exact: Free an exact number of memory pages previously - * allocated by zalloc_pages_exact. - * @get_page: Increment the refcount on a page. - * @put_page: Decrement the refcount on a page. When the refcount - * reaches 0 the page is automatically freed. - * @page_count: Return the refcount of a page. - * @phys_to_virt: Convert a physical address into a virtual address mapped - * in the current context. - * @virt_to_phys: Convert a virtual address mapped in the current context - * into a physical address. + * @zalloc_page: Allocate a single zeroed memory page. + * The @arg parameter can be used by the walker + * to pass a memcache. The initial refcount of + * the page is 1. + * @zalloc_pages_exact: Allocate an exact number of zeroed memory pages. + * The @size parameter is in bytes, and is rounded + * up to the next page boundary. The resulting + * allocation is physically contiguous. + * @free_pages_exact: Free an exact number of memory pages previously + * allocated by zalloc_pages_exact. + * @get_page: Increment the refcount on a page. + * @put_page: Decrement the refcount on a page. When the + * refcount reaches 0 the page is automatically + * freed. + * @page_count: Return the refcount of a page. + * @phys_to_virt: Convert a physical address into a virtual + * address mapped in the current context. + * @virt_to_phys: Convert a virtual address mapped in the current + * context into a physical address. + * @dcache_clean_inval_poc: Clean and invalidate the data cache to the PoC + * for the specified memory address range. + * @icache_inval_pou: Invalidate the instruction cache to the PoU + * for the specified memory address range. */ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); @@ -54,6 +60,8 @@ struct kvm_pgtable_mm_ops { int (*page_count)(void *addr); void* (*phys_to_virt)(phys_addr_t phys); phys_addr_t (*virt_to_phys)(void *addr); + void (*dcache_clean_inval_poc)(void *addr, size_t size); + void (*icache_inval_pou)(void *addr, size_t size); }; /** -- cgit From a4d5ca5c7cd8fe85056b8cb838fbcb7e5a05f356 Mon Sep 17 00:00:00 2001 From: Yanan Wang Date: Thu, 17 Jun 2021 18:58:22 +0800 Subject: KVM: arm64: Introduce mm_ops member for structure stage2_attr_data Also add a mm_ops member for structure stage2_attr_data, since we will move I-cache maintenance for guest stage-2 to the permission path and as a result will need mm_ops for some callbacks. Reviewed-by: Fuad Tabba Signed-off-by: Yanan Wang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210617105824.31752-3-wangyanan55@huawei.com --- arch/arm64/kvm/hyp/pgtable.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index c37c1dc4feaf..d99789432b05 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -861,10 +861,11 @@ int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) } struct stage2_attr_data { - kvm_pte_t attr_set; - kvm_pte_t attr_clr; - kvm_pte_t pte; - u32 level; + kvm_pte_t attr_set; + kvm_pte_t attr_clr; + kvm_pte_t pte; + u32 level; + struct kvm_pgtable_mm_ops *mm_ops; }; static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, @@ -903,6 +904,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, struct stage2_attr_data data = { .attr_set = attr_set & attr_mask, .attr_clr = attr_clr & attr_mask, + .mm_ops = pgt->mm_ops, }; struct kvm_pgtable_walker walker = { .cb = stage2_attr_walker, -- cgit From 378e6a9c78a02b4b609846aa0afccf34d3038977 Mon Sep 17 00:00:00 2001 From: Yanan Wang Date: Thu, 17 Jun 2021 18:58:23 +0800 Subject: KVM: arm64: Tweak parameters of guest cache maintenance functions Adjust the parameter "kvm_pfn_t pfn" of __clean_dcache_guest_page and __invalidate_icache_guest_page to "void *va", which paves the way for converting these two guest CMO functions into callbacks in structure kvm_pgtable_mm_ops. No functional change. Reviewed-by: Fuad Tabba Signed-off-by: Yanan Wang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210617105824.31752-4-wangyanan55@huawei.com --- arch/arm64/include/asm/kvm_mmu.h | 9 ++------- arch/arm64/kvm/mmu.c | 28 +++++++++++++++------------- 2 files changed, 17 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 25ed956f9af1..6844a7550392 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -187,10 +187,8 @@ static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101; } -static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) +static inline void __clean_dcache_guest_page(void *va, size_t size) { - void *va = page_address(pfn_to_page(pfn)); - /* * With FWB, we ensure that the guest always accesses memory using * cacheable attributes, and we don't have to clean to PoC when @@ -203,16 +201,13 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) kvm_flush_dcache_to_poc(va, size); } -static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, - unsigned long size) +static inline void __invalidate_icache_guest_page(void *va, size_t size) { if (icache_is_aliasing()) { /* any kind of VIPT cache */ __flush_icache_all(); } else if (is_kernel_in_hyp_mode() || !icache_is_vpipt()) { /* PIPT or VPIPT at EL2 (see comment in __kvm_tlb_flush_vmid_ipa) */ - void *va = page_address(pfn_to_page(pfn)); - invalidate_icache_range((unsigned long)va, (unsigned long)va + size); } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c10207fed2f3..0a5a5b098a4a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -126,6 +126,16 @@ static void *kvm_host_va(phys_addr_t phys) return __va(phys); } +static void clean_dcache_guest_page(void *va, size_t size) +{ + __clean_dcache_guest_page(va, size); +} + +static void invalidate_icache_guest_page(void *va, size_t size) +{ + __invalidate_icache_guest_page(va, size); +} + /* * Unmapping vs dcache management: * @@ -693,16 +703,6 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); } -static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) -{ - __clean_dcache_guest_page(pfn, size); -} - -static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) -{ - __invalidate_icache_guest_page(pfn, size); -} - static void kvm_send_hwpoison_signal(unsigned long address, short lsb) { send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); @@ -972,11 +972,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, prot |= KVM_PGTABLE_PROT_W; if (fault_status != FSC_PERM && !device) - clean_dcache_guest_page(pfn, vma_pagesize); + clean_dcache_guest_page(page_address(pfn_to_page(pfn)), + vma_pagesize); if (exec_fault) { prot |= KVM_PGTABLE_PROT_X; - invalidate_icache_guest_page(pfn, vma_pagesize); + invalidate_icache_guest_page(page_address(pfn_to_page(pfn)), + vma_pagesize); } if (device) @@ -1178,7 +1180,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) * We've moved a page around, probably through CoW, so let's treat it * just like a translation fault and clean the cache to the PoC. */ - clean_dcache_guest_page(pfn, PAGE_SIZE); + clean_dcache_guest_page(page_address(pfn_to_page(pfn)), PAGE_SIZE); /* * The MMU notifiers will have unmapped a huge PMD before calling -- cgit From 25aa28691bb960a76f0cffd8862144a29487f6ff Mon Sep 17 00:00:00 2001 From: Yanan Wang Date: Thu, 17 Jun 2021 18:58:24 +0800 Subject: KVM: arm64: Move guest CMOs to the fault handlers We currently uniformly perform CMOs of D-cache and I-cache in function user_mem_abort before calling the fault handlers. If we get concurrent guest faults(e.g. translation faults, permission faults) or some really unnecessary guest faults caused by BBM, CMOs for the first vcpu are necessary while the others later are not. By moving CMOs to the fault handlers, we can easily identify conditions where they are really needed and avoid the unnecessary ones. As it's a time consuming process to perform CMOs especially when flushing a block range, so this solution reduces much load of kvm and improve efficiency of the stage-2 page table code. We can imagine two specific scenarios which will gain much benefit: 1) In a normal VM startup, this solution will improve the efficiency of handling guest page faults incurred by vCPUs, when initially populating stage-2 page tables. 2) After live migration, the heavy workload will be resumed on the destination VM, however all the stage-2 page tables need to be rebuilt at the moment. So this solution will ease the performance drop during resuming stage. Reviewed-by: Fuad Tabba Signed-off-by: Yanan Wang Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210617105824.31752-5-wangyanan55@huawei.com --- arch/arm64/kvm/hyp/pgtable.c | 38 +++++++++++++++++++++++++++++++------- arch/arm64/kvm/mmu.c | 21 +++++++-------------- 2 files changed, 38 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index d99789432b05..72f1d8f50094 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -577,12 +577,24 @@ static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr, mm_ops->put_page(ptep); } +static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) +{ + u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; + return memattr == KVM_S2_MEMATTR(pgt, NORMAL); +} + +static bool stage2_pte_executable(kvm_pte_t pte) +{ + return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); +} + static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct stage2_map_data *data) { kvm_pte_t new, old = *ptep; u64 granule = kvm_granule_size(level), phys = data->phys; + struct kvm_pgtable *pgt = data->mmu->pgt; struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_block_mapping_supported(addr, end, phys, level)) @@ -606,6 +618,14 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); } + /* Perform CMOs before installation of the guest stage-2 PTE */ + if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new)) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops), + granule); + + if (mm_ops->icache_inval_pou && stage2_pte_executable(new)) + mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule); + smp_store_release(ptep, new); if (stage2_pte_is_counted(new)) mm_ops->get_page(ptep); @@ -798,12 +818,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } -static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) -{ - u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; - return memattr == KVM_S2_MEMATTR(pgt, NORMAL); -} - static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) @@ -874,6 +888,7 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, { kvm_pte_t pte = *ptep; struct stage2_attr_data *data = arg; + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; if (!kvm_pte_valid(pte)) return 0; @@ -888,8 +903,17 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * but worst-case the access flag update gets lost and will be * set on the next access instead. */ - if (data->pte != pte) + if (data->pte != pte) { + /* + * Invalidate instruction cache before updating the guest + * stage-2 PTE if we are going to add executable permission. + */ + if (mm_ops->icache_inval_pou && + stage2_pte_executable(pte) && !stage2_pte_executable(*ptep)) + mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); WRITE_ONCE(*ptep, pte); + } return 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 0a5a5b098a4a..0b3ba57849f6 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -442,6 +442,8 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { .page_count = kvm_host_page_count, .phys_to_virt = kvm_host_va, .virt_to_phys = kvm_host_pa, + .dcache_clean_inval_poc = clean_dcache_guest_page, + .icache_inval_pou = invalidate_icache_guest_page, }; /** @@ -971,15 +973,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (writable) prot |= KVM_PGTABLE_PROT_W; - if (fault_status != FSC_PERM && !device) - clean_dcache_guest_page(page_address(pfn_to_page(pfn)), - vma_pagesize); - - if (exec_fault) { + if (exec_fault) prot |= KVM_PGTABLE_PROT_X; - invalidate_icache_guest_page(page_address(pfn_to_page(pfn)), - vma_pagesize); - } if (device) prot |= KVM_PGTABLE_PROT_DEVICE; @@ -1177,12 +1172,10 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) WARN_ON(range->end - range->start != 1); /* - * We've moved a page around, probably through CoW, so let's treat it - * just like a translation fault and clean the cache to the PoC. - */ - clean_dcache_guest_page(page_address(pfn_to_page(pfn)), PAGE_SIZE); - - /* + * We've moved a page around, probably through CoW, so let's treat + * it just like a translation fault and the map handler will clean + * the cache to the PoC. + * * The MMU notifiers will have unmapped a huge PMD before calling * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and * therefore we never need to clear out a huge PMD through this -- cgit From 2a71fabf6a1bc9162a84e18d6ab991230ca4d588 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Fri, 18 Jun 2021 11:51:39 +0100 Subject: KVM: arm64: Don't zero the cycle count register when PMCR_EL0.P is set According to ARM DDI 0487G.a, page D13-3895, setting the PMCR_EL0.P bit to 1 has the following effect: "Reset all event counters accessible in the current Exception level, not including PMCCNTR_EL0, to zero." Similar behaviour is described for AArch32 on page G8-7022. Make it so. Fixes: c01d6a18023b ("KVM: arm64: pmu: Only handle supported event counters") Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210618105139.83795-1-alexandru.elisei@arm.com --- arch/arm64/kvm/pmu-emul.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index fd167d4f4215..ecc0d19c8cc1 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -578,6 +578,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); if (val & ARMV8_PMU_PMCR_P) { + mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); for_each_set_bit(i, &mask, 32) kvm_pmu_set_counter_value(vcpu, i, 0); } -- cgit From d0c94c49792cf780cbfefe29f81bb8c3b73bc76b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 3 Jun 2021 16:50:02 +0100 Subject: KVM: arm64: Restore PMU configuration on first run Restoring a guest with an active virtual PMU results in no perf counters being instanciated on the host side. Not quite what you'd expect from a restore. In order to fix this, force a writeback of PMCR_EL0 on the first run of a vcpu (using a new request so that it happens once the vcpu has been loaded). This will in turn create all the host-side counters that were missing. Reported-by: Jinank Jain Tested-by: Jinank Jain Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/87wnrbylxv.wl-maz@kernel.org Link: https://lore.kernel.org/r/b53dfcf9bbc4db7f96154b1cd5188d72b9766358.camel@amazon.de --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 4 ++++ arch/arm64/kvm/pmu-emul.c | 3 +++ 3 files changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7cd7d5c8c4bc..6336b4309114 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,7 @@ #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) +#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e720148232a0..facf4d41d32a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -689,6 +689,10 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) vgic_v4_load(vcpu); preempt_enable(); } + + if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu)) + kvm_pmu_handle_pmcr(vcpu, + __vcpu_sys_reg(vcpu, PMCR_EL0)); } } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index ecc0d19c8cc1..f33825c995cb 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -851,6 +851,9 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) return -EINVAL; } + /* One-off reload of the PMU on first run */ + kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu); + return 0; } -- cgit From 69e3b846d8a753f9f279f29531ca56b0f7563ad0 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:11 +0100 Subject: arm64: mte: Sync tags for pages where PTE is untagged A KVM guest could store tags in a page even if the VMM hasn't mapped the page with PROT_MTE. So when restoring pages from swap we will need to check to see if there are any saved tags even if !pte_tagged(). However don't check pages for which pte_access_permitted() returns false as these will not have been swapped out. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-2-steven.price@arm.com --- arch/arm64/include/asm/mte.h | 4 ++-- arch/arm64/include/asm/pgtable.h | 22 +++++++++++++++++++--- arch/arm64/kernel/mte.c | 18 +++++++++++++----- 3 files changed, 34 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index bc88a1ced0d7..347ef38a35f7 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -37,7 +37,7 @@ void mte_free_tag_storage(char *storage); /* track which pages have valid allocation tags */ #define PG_mte_tagged PG_arch_2 -void mte_sync_tags(pte_t *ptep, pte_t pte); +void mte_sync_tags(pte_t old_pte, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); void mte_thread_init_user(void); void mte_thread_switch(struct task_struct *next); @@ -53,7 +53,7 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request, /* unused if !CONFIG_ARM64_MTE, silence the compiler */ #define PG_mte_tagged 0 -static inline void mte_sync_tags(pte_t *ptep, pte_t pte) +static inline void mte_sync_tags(pte_t old_pte, pte_t pte) { } static inline void mte_copy_page_tags(void *kto, const void *kfrom) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 0b10204e72fc..db5402168841 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -314,9 +314,25 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) __sync_icache_dcache(pte); - if (system_supports_mte() && - pte_present(pte) && pte_tagged(pte) && !pte_special(pte)) - mte_sync_tags(ptep, pte); + /* + * If the PTE would provide user space access to the tags associated + * with it then ensure that the MTE tags are synchronised. Although + * pte_access_permitted() returns false for exec only mappings, they + * don't expose tags (instruction fetches don't check tags). + */ + if (system_supports_mte() && pte_access_permitted(pte, false) && + !pte_special(pte)) { + pte_t old_pte = READ_ONCE(*ptep); + /* + * We only need to synchronise if the new PTE has tags enabled + * or if swapping in (in which case another mapping may have + * set tags in the past even if this PTE isn't tagged). + * (!pte_none() && !pte_present()) is an open coded version of + * is_swap_pte() + */ + if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte))) + mte_sync_tags(old_pte, pte); + } __check_racy_pte_update(mm, ptep, pte); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 125a10e413e9..69b3fde8759e 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -32,10 +32,9 @@ DEFINE_STATIC_KEY_FALSE(mte_async_mode); EXPORT_SYMBOL_GPL(mte_async_mode); #endif -static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) +static void mte_sync_page_tags(struct page *page, pte_t old_pte, + bool check_swap, bool pte_is_tagged) { - pte_t old_pte = READ_ONCE(*ptep); - if (check_swap && is_swap_pte(old_pte)) { swp_entry_t entry = pte_to_swp_entry(old_pte); @@ -43,6 +42,9 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) return; } + if (!pte_is_tagged) + return; + page_kasan_tag_reset(page); /* * We need smp_wmb() in between setting the flags and clearing the @@ -55,16 +57,22 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap) mte_clear_page_tags(page_address(page)); } -void mte_sync_tags(pte_t *ptep, pte_t pte) +void mte_sync_tags(pte_t old_pte, pte_t pte) { struct page *page = pte_page(pte); long i, nr_pages = compound_nr(page); bool check_swap = nr_pages == 1; + bool pte_is_tagged = pte_tagged(pte); + + /* Early out if there's nothing to do */ + if (!check_swap && !pte_is_tagged) + return; /* if PG_mte_tagged is set, tags have already been initialised */ for (i = 0; i < nr_pages; i++, page++) { if (!test_and_set_bit(PG_mte_tagged, &page->flags)) - mte_sync_page_tags(page, ptep, check_swap); + mte_sync_page_tags(page, old_pte, check_swap, + pte_is_tagged); } } -- cgit From ea7fc1bb1cd1b92b42b1d9273ce7e231d3dc9321 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:12 +0100 Subject: KVM: arm64: Introduce MTE VM feature Add a new VM feature 'KVM_ARM_CAP_MTE' which enables memory tagging for a VM. This will expose the feature to the guest and automatically tag memory pages touched by the VM as PG_mte_tagged (and clear the tag storage) to ensure that the guest cannot see stale tags, and so that the tags are correctly saved/restored across swap. Actually exposing the new capability to user space happens in a later patch. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price [maz: move VM_SHARED sampling into the critical section] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-3-steven.price@arm.com --- arch/arm64/include/asm/kvm_emulate.h | 3 ++ arch/arm64/include/asm/kvm_host.h | 4 +++ arch/arm64/kvm/hyp/exception.c | 3 +- arch/arm64/kvm/mmu.c | 67 +++++++++++++++++++++++++++++++++++- arch/arm64/kvm/sys_regs.c | 7 ++++ include/uapi/linux/kvm.h | 1 + 6 files changed, 83 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 01b9857757f2..fd418955e31e 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -84,6 +84,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) || vcpu_el1_is_32bit(vcpu)) vcpu->arch.hcr_el2 |= HCR_TID2; + + if (kvm_has_mte(vcpu->kvm)) + vcpu->arch.hcr_el2 |= HCR_ATA; } static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7cd7d5c8c4bc..1c4293c46ef6 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -132,6 +132,9 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; + + /* Memory Tagging Extension enabled for the guest */ + bool mte_enabled; }; struct kvm_vcpu_fault_info { @@ -769,6 +772,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) +#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled) #define kvm_vcpu_has_pmu(vcpu) \ (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 11541b94b328..0418399e0a20 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -112,7 +112,8 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, new |= (old & PSR_C_BIT); new |= (old & PSR_V_BIT); - // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + if (kvm_has_mte(vcpu->kvm)) + new |= PSR_TCO_BIT; new |= (old & PSR_DIT_BIT); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c10207fed2f3..c6a97d463892 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -822,6 +822,45 @@ transparent_hugepage_adjust(struct kvm_memory_slot *memslot, return PAGE_SIZE; } +/* + * The page will be mapped in stage 2 as Normal Cacheable, so the VM will be + * able to see the page's tags and therefore they must be initialised first. If + * PG_mte_tagged is set, tags have already been initialised. + * + * The race in the test/set of the PG_mte_tagged flag is handled by: + * - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs + * racing to santise the same page + * - mmap_lock protects between a VM faulting a page in and the VMM performing + * an mprotect() to add VM_MTE + */ +static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, + unsigned long size) +{ + unsigned long i, nr_pages = size >> PAGE_SHIFT; + struct page *page; + + if (!kvm_has_mte(kvm)) + return 0; + + /* + * pfn_to_online_page() is used to reject ZONE_DEVICE pages + * that may not support tags. + */ + page = pfn_to_online_page(pfn); + + if (!page) + return -EFAULT; + + for (i = 0; i < nr_pages; i++, page++) { + if (!test_bit(PG_mte_tagged, &page->flags)) { + mte_clear_page_tags(page_address(page)); + set_bit(PG_mte_tagged, &page->flags); + } + } + + return 0; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -830,6 +869,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, bool write_fault, writable, force_pte = false; bool exec_fault; bool device = false; + bool shared; unsigned long mmu_seq; struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; @@ -873,6 +913,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = PAGE_SHIFT; } + shared = (vma->vm_flags & VM_PFNMAP); + switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SHIFT: @@ -971,8 +1013,18 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (writable) prot |= KVM_PGTABLE_PROT_W; - if (fault_status != FSC_PERM && !device) + if (fault_status != FSC_PERM && !device) { + /* Check the VMM hasn't introduced a new VM_SHARED VMA */ + if (kvm_has_mte(kvm) && shared) { + ret = -EFAULT; + goto out_unlock; + } + ret = sanitise_mte_tags(kvm, pfn, vma_pagesize); + if (ret) + goto out_unlock; + clean_dcache_guest_page(pfn, vma_pagesize); + } if (exec_fault) { prot |= KVM_PGTABLE_PROT_X; @@ -1168,12 +1220,17 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { kvm_pfn_t pfn = pte_pfn(range->pte); + int ret; if (!kvm->arch.mmu.pgt) return false; WARN_ON(range->end - range->start != 1); + ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE); + if (ret) + return false; + /* * We've moved a page around, probably through CoW, so let's treat it * just like a translation fault and clean the cache to the PoC. @@ -1381,6 +1438,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (!vma) break; + /* + * VM_SHARED mappings are not allowed with MTE to avoid races + * when updating the PG_mte_tagged page flag, see + * sanitise_mte_tags for more details. + */ + if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) + return -EINVAL; + /* * Take the intersection of this VMA with the memory region */ diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1a7968ad078c..36f67f8deae1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1047,6 +1047,13 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, break; case SYS_ID_AA64PFR1_EL1: val &= ~FEATURE(ID_AA64PFR1_MTE); + if (kvm_has_mte(vcpu->kvm)) { + u64 pfr, mte; + + pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); + mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT); + val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte); + } break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 79d9c44d1ad7..d4da58ddcad7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1083,6 +1083,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SGX_ATTRIBUTE 196 #define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 #define KVM_CAP_PTP_KVM 198 +#define KVM_CAP_ARM_MTE 199 #ifdef KVM_CAP_IRQ_ROUTING -- cgit From e1f358b5046479d2897f23b1d5b092687c6e7a67 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:13 +0100 Subject: KVM: arm64: Save/restore MTE registers Define the new system registers that MTE introduces and context switch them. The MTE feature is still hidden from the ID register as it isn't supported in a VM yet. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-4-steven.price@arm.com --- arch/arm64/include/asm/kvm_arm.h | 3 +- arch/arm64/include/asm/kvm_host.h | 6 +++ arch/arm64/include/asm/kvm_mte.h | 66 ++++++++++++++++++++++++++++++ arch/arm64/include/asm/sysreg.h | 3 +- arch/arm64/kernel/asm-offsets.c | 2 + arch/arm64/kvm/hyp/entry.S | 7 ++++ arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 21 ++++++++++ arch/arm64/kvm/sys_regs.c | 22 ++++++++-- 8 files changed, 124 insertions(+), 6 deletions(-) create mode 100644 arch/arm64/include/asm/kvm_mte.h (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 692c9049befa..d436831dd706 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -12,7 +12,8 @@ #include /* Hyp Configuration Register (HCR) bits */ -#define HCR_ATA (UL(1) << 56) +#define HCR_ATA_SHIFT 56 +#define HCR_ATA (UL(1) << HCR_ATA_SHIFT) #define HCR_FWB (UL(1) << 46) #define HCR_API (UL(1) << 41) #define HCR_APK (UL(1) << 40) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 1c4293c46ef6..74a7447a83a1 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -209,6 +209,12 @@ enum vcpu_sysreg { CNTP_CVAL_EL0, CNTP_CTL_EL0, + /* Memory Tagging Extension registers */ + RGSR_EL1, /* Random Allocation Tag Seed Register */ + GCR_EL1, /* Tag Control Register */ + TFSR_EL1, /* Tag Fault Status Register (EL1) */ + TFSRE0_EL1, /* Tag Fault Status Register (EL0) */ + /* 32bit specific registers. Keep them at the end of the range */ DACR32_EL2, /* Domain Access Control Register */ IFSR32_EL2, /* Instruction Fault Status Register */ diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h new file mode 100644 index 000000000000..de002636eb1f --- /dev/null +++ b/arch/arm64/include/asm/kvm_mte.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 ARM Ltd. + */ +#ifndef __ASM_KVM_MTE_H +#define __ASM_KVM_MTE_H + +#ifdef __ASSEMBLY__ + +#include + +#ifdef CONFIG_ARM64_MTE + +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1 +alternative_if_not ARM64_MTE + b .L__skip_switch\@ +alternative_else_nop_endif + mrs \reg1, hcr_el2 + tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@ + + mrs_s \reg1, SYS_RGSR_EL1 + str \reg1, [\h_ctxt, #CPU_RGSR_EL1] + mrs_s \reg1, SYS_GCR_EL1 + str \reg1, [\h_ctxt, #CPU_GCR_EL1] + + ldr \reg1, [\g_ctxt, #CPU_RGSR_EL1] + msr_s SYS_RGSR_EL1, \reg1 + ldr \reg1, [\g_ctxt, #CPU_GCR_EL1] + msr_s SYS_GCR_EL1, \reg1 + +.L__skip_switch\@: +.endm + +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1 +alternative_if_not ARM64_MTE + b .L__skip_switch\@ +alternative_else_nop_endif + mrs \reg1, hcr_el2 + tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@ + + mrs_s \reg1, SYS_RGSR_EL1 + str \reg1, [\g_ctxt, #CPU_RGSR_EL1] + mrs_s \reg1, SYS_GCR_EL1 + str \reg1, [\g_ctxt, #CPU_GCR_EL1] + + ldr \reg1, [\h_ctxt, #CPU_RGSR_EL1] + msr_s SYS_RGSR_EL1, \reg1 + ldr \reg1, [\h_ctxt, #CPU_GCR_EL1] + msr_s SYS_GCR_EL1, \reg1 + + isb + +.L__skip_switch\@: +.endm + +#else /* !CONFIG_ARM64_MTE */ + +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1 +.endm + +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1 +.endm + +#endif /* CONFIG_ARM64_MTE */ +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_KVM_MTE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 65d15700a168..347ccac2341e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -651,7 +651,8 @@ #define INIT_SCTLR_EL2_MMU_ON \ (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \ - SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1) + SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \ + SCTLR_ELx_ITFSB | SCTLR_EL2_RES1) #define INIT_SCTLR_EL2_MMU_OFF \ (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 0cb34ccb6e73..6f0044cb233e 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -111,6 +111,8 @@ int main(void) DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs)); + DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1])); + DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1])); DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1])); DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1])); DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1])); diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index e831d3dfd50d..435346ea1504 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -13,6 +13,7 @@ #include #include #include +#include #include .text @@ -51,6 +52,9 @@ alternative_else_nop_endif add x29, x0, #VCPU_CONTEXT + // mte_switch_to_guest(g_ctxt, h_ctxt, tmp1) + mte_switch_to_guest x29, x1, x2 + // Macro ptrauth_switch_to_guest format: // ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3) // The below macro to restore guest keys is not implemented in C code @@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // when this feature is enabled for kernel code. ptrauth_switch_to_hyp x1, x2, x3, x4, x5 + // mte_switch_to_hyp(g_ctxt, h_ctxt, reg1) + mte_switch_to_hyp x1, x2, x3 + // Restore hyp's sp_el0 restore_sp_el0 x2, x3 diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index cce43bfe158f..de7e14c862e6 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -14,6 +14,7 @@ #include #include #include +#include static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) { @@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); } +static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; + + if (!vcpu) + vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); + + return kvm_has_mte(kern_hyp_va(vcpu->kvm)); +} + static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) { ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1); @@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); + if (ctxt_has_mte(ctxt)) { + ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR); + ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1); + } + ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); @@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); + if (ctxt_has_mte(ctxt)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR); + write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1); + } + if (!has_vhe() && cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && ctxt->__hyp_running_vcpu) { diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 36f67f8deae1..5c75b24eae21 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1309,6 +1309,20 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } +static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) +{ + return REG_HIDDEN; +} + +#define MTE_REG(name) { \ + SYS_DESC(SYS_##name), \ + .access = undef_access, \ + .reset = reset_unknown, \ + .reg = name, \ + .visibility = mte_visibility, \ +} + /* sys_reg_desc initialiser for known cpufeature ID registers */ #define ID_SANITISED(name) { \ SYS_DESC(SYS_##name), \ @@ -1477,8 +1491,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, - { SYS_DESC(SYS_RGSR_EL1), undef_access }, - { SYS_DESC(SYS_GCR_EL1), undef_access }, + MTE_REG(RGSR_EL1), + MTE_REG(GCR_EL1), { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, { SYS_DESC(SYS_TRFCR_EL1), undef_access }, @@ -1505,8 +1519,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi }, { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi }, - { SYS_DESC(SYS_TFSR_EL1), undef_access }, - { SYS_DESC(SYS_TFSRE0_EL1), undef_access }, + MTE_REG(TFSR_EL1), + MTE_REG(TFSRE0_EL1), { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 }, { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, -- cgit From 673638f434ee4a00319e254ade338c57618d6f7e Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:14 +0100 Subject: KVM: arm64: Expose KVM_ARM_CAP_MTE It's now safe for the VMM to enable MTE in a guest, so expose the capability to user space. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-5-steven.price@arm.com --- arch/arm64/kvm/arm.c | 9 +++++++++ arch/arm64/kvm/reset.c | 4 ++++ arch/arm64/kvm/sys_regs.c | 3 +++ 3 files changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e720148232a0..28ce26a68f09 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -93,6 +93,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = 0; kvm->arch.return_nisv_io_abort_to_user = true; break; + case KVM_CAP_ARM_MTE: + if (!system_supports_mte() || kvm->created_vcpus) + return -EINVAL; + r = 0; + kvm->arch.mte_enabled = true; + break; default: r = -EINVAL; break; @@ -237,6 +243,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) */ r = 1; break; + case KVM_CAP_ARM_MTE: + r = system_supports_mte(); + break; case KVM_CAP_STEAL_TIME: r = kvm_arm_pvtime_supported(); break; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index d37ebee085cf..cba7872d69a8 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -176,6 +176,10 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu) if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit) return false; + /* MTE is incompatible with AArch32 */ + if (kvm_has_mte(vcpu->kvm) && is32bit) + return false; + /* Check that the vcpus are either all 32bit or all 64bit */ kvm_for_each_vcpu(i, tmp, vcpu->kvm) { if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 5c75b24eae21..f6f126eb6ac1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1312,6 +1312,9 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { + if (kvm_has_mte(vcpu->kvm)) + return 0; + return REG_HIDDEN; } -- cgit From f0376edb1ddcab19a473b4bf1fbd5b6bbed3705b Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 21 Jun 2021 12:17:15 +0100 Subject: KVM: arm64: Add ioctl to fetch/store tags in a guest The VMM may not wish to have it's own mapping of guest memory mapped with PROT_MTE because this causes problems if the VMM has tag checking enabled (the guest controls the tags in physical RAM and it's unlikely the tags are correct for the VMM). Instead add a new ioctl which allows the VMM to easily read/write the tags from guest memory, allowing the VMM's mapping to be non-PROT_MTE while the VMM can still read/write the tags for the purpose of migration. Reviewed-by: Catalin Marinas Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210621111716.37157-6-steven.price@arm.com --- arch/arm64/include/asm/kvm_host.h | 3 ++ arch/arm64/include/asm/mte-def.h | 1 + arch/arm64/include/uapi/asm/kvm.h | 11 ++++++ arch/arm64/kvm/arm.c | 7 ++++ arch/arm64/kvm/guest.c | 82 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 1 + 6 files changed, 105 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 74a7447a83a1..c93a7198c242 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -730,6 +730,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags); + /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h index cf241b0f0a42..626d359b396e 100644 --- a/arch/arm64/include/asm/mte-def.h +++ b/arch/arm64/include/asm/mte-def.h @@ -7,6 +7,7 @@ #define MTE_GRANULE_SIZE UL(16) #define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1)) +#define MTE_GRANULES_PER_PAGE (PAGE_SIZE / MTE_GRANULE_SIZE) #define MTE_TAG_SHIFT 56 #define MTE_TAG_SIZE 4 #define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 24223adae150..b3edde68bc3e 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -184,6 +184,17 @@ struct kvm_vcpu_events { __u32 reserved[12]; }; +struct kvm_arm_copy_mte_tags { + __u64 guest_ipa; + __u64 length; + void __user *addr; + __u64 flags; + __u64 reserved[2]; +}; + +#define KVM_ARM_TAGS_TO_GUEST 0 +#define KVM_ARM_TAGS_FROM_GUEST 1 + /* If you need to interpret the index values, here is the key: */ #define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000 #define KVM_REG_ARM_COPROC_SHIFT 16 diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 28ce26a68f09..511f3716fe33 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1359,6 +1359,13 @@ long kvm_arch_vm_ioctl(struct file *filp, return 0; } + case KVM_ARM_MTE_COPY_TAGS: { + struct kvm_arm_copy_mte_tags copy_tags; + + if (copy_from_user(©_tags, argp, sizeof(copy_tags))) + return -EFAULT; + return kvm_vm_ioctl_mte_copy_tags(kvm, ©_tags); + } default: return -EINVAL; } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 5cb4a1cd5603..4ddb20017b2f 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -995,3 +995,85 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, return ret; } + +long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, + struct kvm_arm_copy_mte_tags *copy_tags) +{ + gpa_t guest_ipa = copy_tags->guest_ipa; + size_t length = copy_tags->length; + void __user *tags = copy_tags->addr; + gpa_t gfn; + bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST); + int ret = 0; + + if (!kvm_has_mte(kvm)) + return -EINVAL; + + if (copy_tags->reserved[0] || copy_tags->reserved[1]) + return -EINVAL; + + if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST) + return -EINVAL; + + if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK) + return -EINVAL; + + gfn = gpa_to_gfn(guest_ipa); + + mutex_lock(&kvm->slots_lock); + + while (length > 0) { + kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL); + void *maddr; + unsigned long num_tags; + struct page *page; + + if (is_error_noslot_pfn(pfn)) { + ret = -EFAULT; + goto out; + } + + page = pfn_to_online_page(pfn); + if (!page) { + /* Reject ZONE_DEVICE memory */ + ret = -EFAULT; + goto out; + } + maddr = page_address(page); + + if (!write) { + if (test_bit(PG_mte_tagged, &page->flags)) + num_tags = mte_copy_tags_to_user(tags, maddr, + MTE_GRANULES_PER_PAGE); + else + /* No tags in memory, so write zeros */ + num_tags = MTE_GRANULES_PER_PAGE - + clear_user(tags, MTE_GRANULES_PER_PAGE); + kvm_release_pfn_clean(pfn); + } else { + num_tags = mte_copy_tags_from_user(maddr, tags, + MTE_GRANULES_PER_PAGE); + kvm_release_pfn_dirty(pfn); + } + + if (num_tags != MTE_GRANULES_PER_PAGE) { + ret = -EFAULT; + goto out; + } + + /* Set the flag after checking the write completed fully */ + if (write) + set_bit(PG_mte_tagged, &page->flags); + + gfn++; + tags += num_tags; + length -= PAGE_SIZE; + } + +out: + mutex_unlock(&kvm->slots_lock); + /* If some data has been copied report the number of bytes copied */ + if (length != copy_tags->length) + return copy_tags->length - length; + return ret; +} diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d4da58ddcad7..da1edd2b4046 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1429,6 +1429,7 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_PMU_EVENT_FILTER */ #define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) #define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) +#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) -- cgit From 98db7259fa7b963d80da49fd636744e28a78981e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 24 Jun 2021 14:21:05 +0100 Subject: KVM: arm64: Set the MTE tag bit before releasing the page Setting a page flag without holding a reference to the page is living dangerously. In the tag-writing path, we drop the reference to the page by calling kvm_release_pfn_dirty(), and only then set the PG_mte_tagged bit. It would be safer to do it the other way round. Fixes: f0376edb1ddca ("KVM: arm64: Add ioctl to fetch/store tags in a guest") Cc: Catalin Marinas Reviewed-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/87k0mjidwb.wl-maz@kernel.org --- arch/arm64/kvm/guest.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 4ddb20017b2f..60815ae477cf 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -1053,6 +1053,14 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, } else { num_tags = mte_copy_tags_from_user(maddr, tags, MTE_GRANULES_PER_PAGE); + + /* + * Set the flag after checking the write + * completed fully + */ + if (num_tags == MTE_GRANULES_PER_PAGE) + set_bit(PG_mte_tagged, &page->flags); + kvm_release_pfn_dirty(pfn); } @@ -1061,10 +1069,6 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, goto out; } - /* Set the flag after checking the write completed fully */ - if (write) - set_bit(PG_mte_tagged, &page->flags); - gfn++; tags += num_tags; length -= PAGE_SIZE; -- cgit