From c55191e96caa9d787e8f682c5e525b7f8172a3b4 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 7 Nov 2018 11:36:20 +0100
Subject: arm64: mm: apply r/o permissions of VM areas to its linear alias as
 well

On arm64, we use block mappings and contiguous hints to map the linear
region, to minimize the TLB footprint. However, this means that the
entire region is mapped using read/write permissions, which we cannot
modify at page granularity without having to take intrusive measures to
prevent TLB conflicts.

This means the linear aliases of pages belonging to read-only mappings
(executable or otherwise) in the vmalloc region are also mapped read/write,
and could potentially be abused to modify things like module code, bpf JIT
code or other read-only data.

So let's fix this, by extending the set_memory_ro/rw routines to take
the linear alias into account. The consequence of enabling this is
that we can no longer use block mappings or contiguous hints, so in
cases where the TLB footprint of the linear region is a bottleneck,
performance may be affected.

Therefore, allow this feature to be runtime en/disabled, by setting
rodata=full (or 'on' to disable just this enhancement, or 'off' to
disable read-only mappings for code and r/o data entirely) on the
kernel command line. Also, allow the default value to be set via a
Kconfig option.

Tested-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/mmu_context.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 1e58bf58c22b..dfcfeffd2080 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -35,6 +35,8 @@
 #include <asm/sysreg.h>
 #include <asm/tlbflush.h>
 
+extern bool rodata_full;
+
 static inline void contextidr_thread_switch(struct task_struct *next)
 {
 	if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR))
-- 
cgit 


From c8ebf64eab743130fe404dc6679c2ff0cbc01615 Mon Sep 17 00:00:00 2001
From: Jessica Yu <jeyu@kernel.org>
Date: Mon, 5 Nov 2018 19:53:23 +0100
Subject: arm64/module: use plt section indices for relocations

Instead of saving a pointer to the .plt and .init.plt sections to apply
plt-based relocations, save and use their section indices instead.

The mod->arch.{core,init}.plt pointers were problematic for livepatch
because they pointed within temporary section headers (provided by the
module loader via info->sechdrs) that would be freed after module load.
Since livepatch modules may need to apply relocations post-module-load
(for example, to patch a module that is loaded later), using section
indices to offset into the section headers (instead of accessing them
through a saved pointer) allows livepatch modules on arm64 to pass in
their own copy of the section headers to apply_relocate_add() to apply
delayed relocations.

Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Jessica Yu <jeyu@kernel.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/module.h |  8 +++++---
 arch/arm64/kernel/module-plts.c | 36 ++++++++++++++++++++----------------
 arch/arm64/kernel/module.c      |  9 +++++----
 3 files changed, 30 insertions(+), 23 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 97d0ef12e2ff..f81c1847312d 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -22,7 +22,7 @@
 
 #ifdef CONFIG_ARM64_MODULE_PLTS
 struct mod_plt_sec {
-	struct elf64_shdr	*plt;
+	int			plt_shndx;
 	int			plt_num_entries;
 	int			plt_max_entries;
 };
@@ -36,10 +36,12 @@ struct mod_arch_specific {
 };
 #endif
 
-u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
+u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
+			  void *loc, const Elf64_Rela *rela,
 			  Elf64_Sym *sym);
 
-u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val);
+u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
+				void *loc, u64 val);
 
 #ifdef CONFIG_RANDOMIZE_BASE
 extern u64 module_alloc_base;
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index f0690c2ca3e0..a0efe30211d6 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -16,12 +16,13 @@ static bool in_init(const struct module *mod, void *loc)
 	return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
 }
 
-u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
+u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
+			  void *loc, const Elf64_Rela *rela,
 			  Elf64_Sym *sym)
 {
 	struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
 							  &mod->arch.init;
-	struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr;
+	struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr;
 	int i = pltsec->plt_num_entries;
 	u64 val = sym->st_value + rela->r_addend;
 
@@ -43,11 +44,12 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
 }
 
 #ifdef CONFIG_ARM64_ERRATUM_843419
-u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val)
+u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
+				void *loc, u64 val)
 {
 	struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
 							  &mod->arch.init;
-	struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr;
+	struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr;
 	int i = pltsec->plt_num_entries++;
 	u32 mov0, mov1, mov2, br;
 	int rd;
@@ -202,7 +204,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 	unsigned long core_plts = 0;
 	unsigned long init_plts = 0;
 	Elf64_Sym *syms = NULL;
-	Elf_Shdr *tramp = NULL;
+	Elf_Shdr *pltsec, *tramp = NULL;
 	int i;
 
 	/*
@@ -211,9 +213,9 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 	 */
 	for (i = 0; i < ehdr->e_shnum; i++) {
 		if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt"))
-			mod->arch.core.plt = sechdrs + i;
+			mod->arch.core.plt_shndx = i;
 		else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))
-			mod->arch.init.plt = sechdrs + i;
+			mod->arch.init.plt_shndx = i;
 		else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
 			 !strcmp(secstrings + sechdrs[i].sh_name,
 				 ".text.ftrace_trampoline"))
@@ -222,7 +224,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 			syms = (Elf64_Sym *)sechdrs[i].sh_addr;
 	}
 
-	if (!mod->arch.core.plt || !mod->arch.init.plt) {
+	if (!mod->arch.core.plt_shndx || !mod->arch.init.plt_shndx) {
 		pr_err("%s: module PLT section(s) missing\n", mod->name);
 		return -ENOEXEC;
 	}
@@ -254,17 +256,19 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 						sechdrs[i].sh_info, dstsec);
 	}
 
-	mod->arch.core.plt->sh_type = SHT_NOBITS;
-	mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
-	mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES;
-	mod->arch.core.plt->sh_size = (core_plts  + 1) * sizeof(struct plt_entry);
+	pltsec = sechdrs + mod->arch.core.plt_shndx;
+	pltsec->sh_type = SHT_NOBITS;
+	pltsec->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	pltsec->sh_addralign = L1_CACHE_BYTES;
+	pltsec->sh_size = (core_plts  + 1) * sizeof(struct plt_entry);
 	mod->arch.core.plt_num_entries = 0;
 	mod->arch.core.plt_max_entries = core_plts;
 
-	mod->arch.init.plt->sh_type = SHT_NOBITS;
-	mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
-	mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES;
-	mod->arch.init.plt->sh_size = (init_plts + 1) * sizeof(struct plt_entry);
+	pltsec = sechdrs + mod->arch.init.plt_shndx;
+	pltsec->sh_type = SHT_NOBITS;
+	pltsec->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	pltsec->sh_addralign = L1_CACHE_BYTES;
+	pltsec->sh_size = (init_plts + 1) * sizeof(struct plt_entry);
 	mod->arch.init.plt_num_entries = 0;
 	mod->arch.init.plt_max_entries = init_plts;
 
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index f0f27aeefb73..c2abe59c6f8f 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -198,7 +198,8 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
 	return 0;
 }
 
-static int reloc_insn_adrp(struct module *mod, __le32 *place, u64 val)
+static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
+			   __le32 *place, u64 val)
 {
 	u32 insn;
 
@@ -215,7 +216,7 @@ static int reloc_insn_adrp(struct module *mod, __le32 *place, u64 val)
 		insn &= ~BIT(31);
 	} else {
 		/* out of range for ADR -> emit a veneer */
-		val = module_emit_veneer_for_adrp(mod, place, val & ~0xfff);
+		val = module_emit_veneer_for_adrp(mod, sechdrs, place, val & ~0xfff);
 		if (!val)
 			return -ENOEXEC;
 		insn = aarch64_insn_gen_branch_imm((u64)place, val,
@@ -368,7 +369,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_AARCH64_ADR_PREL_PG_HI21_NC:
 			overflow_check = false;
 		case R_AARCH64_ADR_PREL_PG_HI21:
-			ovf = reloc_insn_adrp(me, loc, val);
+			ovf = reloc_insn_adrp(me, sechdrs, loc, val);
 			if (ovf && ovf != -ERANGE)
 				return ovf;
 			break;
@@ -413,7 +414,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 
 			if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
 			    ovf == -ERANGE) {
-				val = module_emit_plt_entry(me, loc, &rel[i], sym);
+				val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
 				if (!val)
 					return -ENOEXEC;
 				ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
-- 
cgit 


From 4b47e573a4a4f462b619fe623f6dab5c723c2f27 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 5 Oct 2018 13:31:10 +0100
Subject: arm64: perf: Move event definitions into perf_event.h

The PMU event numbers are split between perf_event.h and perf_event.c,
which makes it difficult to spot any gaps in the numbers which may be
allocated in the future.

This patch sorts the events numerically, adds some missing events and
moves the definitions into perf_event.h.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/perf_event.h | 164 +++++++++++++++++++++++++++++++++---
 arch/arm64/kernel/perf_event.c      | 145 +------------------------------
 2 files changed, 155 insertions(+), 154 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index f9ccc36d3dc3..c63e5e4fdccd 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -23,6 +23,160 @@
 #define	ARMV8_PMU_MAX_COUNTERS	32
 #define	ARMV8_PMU_COUNTER_MASK	(ARMV8_PMU_MAX_COUNTERS - 1)
 
+/*
+ * Common architectural and microarchitectural event numbers.
+ */
+#define ARMV8_PMUV3_PERFCTR_SW_INCR				0x00
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL			0x01
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL			0x02
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL			0x03
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE				0x04
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL			0x05
+#define ARMV8_PMUV3_PERFCTR_LD_RETIRED				0x06
+#define ARMV8_PMUV3_PERFCTR_ST_RETIRED				0x07
+#define ARMV8_PMUV3_PERFCTR_INST_RETIRED			0x08
+#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN				0x09
+#define ARMV8_PMUV3_PERFCTR_EXC_RETURN				0x0A
+#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED			0x0B
+#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED			0x0C
+#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED			0x0D
+#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED			0x0E
+#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED		0x0F
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED				0x10
+#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES				0x11
+#define ARMV8_PMUV3_PERFCTR_BR_PRED				0x12
+#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS				0x13
+#define ARMV8_PMUV3_PERFCTR_L1I_CACHE				0x14
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB			0x15
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE				0x16
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL			0x17
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB			0x18
+#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS				0x19
+#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR			0x1A
+#define ARMV8_PMUV3_PERFCTR_INST_SPEC				0x1B
+#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED			0x1C
+#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES				0x1D
+#define ARMV8_PMUV3_PERFCTR_CHAIN				0x1E
+#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE			0x1F
+#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE			0x20
+#define ARMV8_PMUV3_PERFCTR_BR_RETIRED				0x21
+#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED			0x22
+#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND			0x23
+#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND			0x24
+#define ARMV8_PMUV3_PERFCTR_L1D_TLB				0x25
+#define ARMV8_PMUV3_PERFCTR_L1I_TLB				0x26
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE				0x27
+#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL			0x28
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE			0x29
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL			0x2A
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE				0x2B
+#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB			0x2C
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL			0x2D
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL			0x2E
+#define ARMV8_PMUV3_PERFCTR_L2D_TLB				0x2F
+#define ARMV8_PMUV3_PERFCTR_L2I_TLB				0x30
+#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS			0x31
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE				0x32
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS			0x33
+#define ARMV8_PMUV3_PERFCTR_DTLB_WALK				0x34
+#define ARMV8_PMUV3_PERFCTR_ITLB_WALK				0x35
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD				0x36
+#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD			0x37
+#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD			0x38
+
+/* Statistical profiling extension microarchitectural events */
+#define	ARMV8_SPE_PERFCTR_SAMPLE_POP				0x4000
+#define	ARMV8_SPE_PERFCTR_SAMPLE_FEED				0x4001
+#define	ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE			0x4002
+#define	ARMV8_SPE_PERFCTR_SAMPLE_COLLISION			0x4003
+
+/* ARMv8 recommended implementation defined event types */
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD			0x40
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR			0x41
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD		0x42
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR		0x43
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER		0x44
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER		0x45
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM		0x46
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN			0x47
+#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL			0x48
+
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD			0x4C
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR			0x4D
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD				0x4E
+#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR				0x4F
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD			0x50
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR			0x51
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD		0x52
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR		0x53
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM		0x56
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN			0x57
+#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL			0x58
+
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD			0x5C
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR			0x5D
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD				0x5E
+#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR				0x5F
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD			0x60
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR			0x61
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED			0x62
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED		0x63
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL			0x64
+#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH			0x65
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD			0x66
+#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR			0x67
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC			0x68
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC			0x69
+#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC		0x6A
+
+#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC				0x6C
+#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC			0x6D
+#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC			0x6E
+#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC				0x6F
+#define ARMV8_IMPDEF_PERFCTR_LD_SPEC				0x70
+#define ARMV8_IMPDEF_PERFCTR_ST_SPEC				0x71
+#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC				0x72
+#define ARMV8_IMPDEF_PERFCTR_DP_SPEC				0x73
+#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC				0x74
+#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC				0x75
+#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC			0x76
+#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC			0x77
+#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC			0x78
+#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC			0x79
+#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC			0x7A
+
+#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC				0x7C
+#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC				0x7D
+#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC				0x7E
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF				0x81
+#define ARMV8_IMPDEF_PERFCTR_EXC_SVC				0x82
+#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT				0x83
+#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT				0x84
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ				0x86
+#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ				0x87
+#define ARMV8_IMPDEF_PERFCTR_EXC_SMC				0x88
+
+#define ARMV8_IMPDEF_PERFCTR_EXC_HVC				0x8A
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT			0x8B
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT			0x8C
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER			0x8D
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ			0x8E
+#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ			0x8F
+#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC				0x90
+#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC				0x91
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD			0xA0
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR			0xA1
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD		0xA2
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR		0xA3
+
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM		0xA6
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN			0xA7
+#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL			0xA8
+
 /*
  * Per-CPU PMCR: config reg
  */
@@ -49,16 +203,6 @@
 #define	ARMV8_PMU_EVTYPE_MASK	0xc800ffff	/* Mask for writable bits */
 #define	ARMV8_PMU_EVTYPE_EVENT	0xffff		/* Mask for EVENT bits */
 
-/*
- * PMUv3 event types: required events
- */
-#define ARMV8_PMUV3_PERFCTR_SW_INCR				0x00
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL			0x03
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE				0x04
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED				0x10
-#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES				0x11
-#define ARMV8_PMUV3_PERFCTR_BR_PRED				0x12
-
 /*
  * Event filters for PMUv3
  */
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 3005a29472e2..a6e17622bad3 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -1,5 +1,5 @@
 /*
- * PMU support
+ * ARMv8 PMUv3 Performance Events handling code.
  *
  * Copyright (C) 2012 ARM Limited
  * Author: Will Deacon <will.deacon@arm.com>
@@ -30,149 +30,6 @@
 #include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
 
-/*
- * ARMv8 PMUv3 Performance Events handling code.
- * Common event types (some are defined in asm/perf_event.h).
- */
-
-/* At least one of the following is required. */
-#define ARMV8_PMUV3_PERFCTR_INST_RETIRED			0x08
-#define ARMV8_PMUV3_PERFCTR_INST_SPEC				0x1B
-
-/* Common architectural events. */
-#define ARMV8_PMUV3_PERFCTR_LD_RETIRED				0x06
-#define ARMV8_PMUV3_PERFCTR_ST_RETIRED				0x07
-#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN				0x09
-#define ARMV8_PMUV3_PERFCTR_EXC_RETURN				0x0A
-#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED			0x0B
-#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED			0x0C
-#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED			0x0D
-#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED			0x0E
-#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED		0x0F
-#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED			0x1C
-#define ARMV8_PMUV3_PERFCTR_CHAIN				0x1E
-#define ARMV8_PMUV3_PERFCTR_BR_RETIRED				0x21
-
-/* Common microarchitectural events. */
-#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL			0x01
-#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL			0x02
-#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL			0x05
-#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS				0x13
-#define ARMV8_PMUV3_PERFCTR_L1I_CACHE				0x14
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB			0x15
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE				0x16
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL			0x17
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB			0x18
-#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS				0x19
-#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR			0x1A
-#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES				0x1D
-#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE			0x1F
-#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE			0x20
-#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED			0x22
-#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND			0x23
-#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND			0x24
-#define ARMV8_PMUV3_PERFCTR_L1D_TLB				0x25
-#define ARMV8_PMUV3_PERFCTR_L1I_TLB				0x26
-#define ARMV8_PMUV3_PERFCTR_L2I_CACHE				0x27
-#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL			0x28
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE			0x29
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL			0x2A
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE				0x2B
-#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB			0x2C
-#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL			0x2D
-#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL			0x2E
-#define ARMV8_PMUV3_PERFCTR_L2D_TLB				0x2F
-#define ARMV8_PMUV3_PERFCTR_L2I_TLB				0x30
-
-/* ARMv8 recommended implementation defined event types */
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD			0x40
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR			0x41
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD		0x42
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR		0x43
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER		0x44
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER		0x45
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM		0x46
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN			0x47
-#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL			0x48
-
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD			0x4C
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR			0x4D
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD				0x4E
-#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR				0x4F
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD			0x50
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR			0x51
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD		0x52
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR		0x53
-
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM		0x56
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN			0x57
-#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL			0x58
-
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD			0x5C
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR			0x5D
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD				0x5E
-#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR				0x5F
-
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD			0x60
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR			0x61
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED			0x62
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED		0x63
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL			0x64
-#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH			0x65
-
-#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD			0x66
-#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR			0x67
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC			0x68
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC			0x69
-#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC		0x6A
-
-#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC				0x6C
-#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC			0x6D
-#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC			0x6E
-#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC				0x6F
-#define ARMV8_IMPDEF_PERFCTR_LD_SPEC				0x70
-#define ARMV8_IMPDEF_PERFCTR_ST_SPEC				0x71
-#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC				0x72
-#define ARMV8_IMPDEF_PERFCTR_DP_SPEC				0x73
-#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC				0x74
-#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC				0x75
-#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC			0x76
-#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC			0x77
-#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC			0x78
-#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC			0x79
-#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC			0x7A
-
-#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC				0x7C
-#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC				0x7D
-#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC				0x7E
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF				0x81
-#define ARMV8_IMPDEF_PERFCTR_EXC_SVC				0x82
-#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT				0x83
-#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT				0x84
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ				0x86
-#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ				0x87
-#define ARMV8_IMPDEF_PERFCTR_EXC_SMC				0x88
-
-#define ARMV8_IMPDEF_PERFCTR_EXC_HVC				0x8A
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT			0x8B
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT			0x8C
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER			0x8D
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ			0x8E
-#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ			0x8F
-#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC				0x90
-#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC				0x91
-
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD			0xA0
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR			0xA1
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD		0xA2
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR		0xA3
-
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM		0xA6
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN			0xA7
-#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL			0xA8
-
 /* ARMv8 Cortex-A53 specific event types. */
 #define ARMV8_A53_PERFCTR_PREF_LINEFILL				0xC2
 
-- 
cgit 


From 3403e56b41c176f6531a2a6d77d85b46fa34169c Mon Sep 17 00:00:00 2001
From: Alex Van Brunt <avanbrunt@nvidia.com>
Date: Mon, 29 Oct 2018 14:55:58 +0530
Subject: arm64: mm: Don't wait for completion of TLB invalidation when page
 aging

When transitioning a PTE from young to old as part of page aging, we
can avoid waiting for the TLB invalidation to complete and therefore
drop the subsequent DSB instruction. Whilst this opens up a race with
page reclaim, where a PTE in active use via a stale, young TLB entry
does not update the underlying descriptor, the worst thing that happens
is that the page is reclaimed and then immediately faulted back in.

Given that we have a DSB in our context-switch path, the window for a
spurious reclaim is fairly limited and eliding the barrier claims to
boost NVMe/SSD accesses by over 10% on some platforms.

A similar optimisation was made for x86 in commit b13b1d2d8692 ("x86/mm:
In the PTE swapout page reclaim case clear the accessed bit instead of
flushing the TLB").

Signed-off-by: Alex Van Brunt <avanbrunt@nvidia.com>
Signed-off-by: Ashish Mhetre <amhetre@nvidia.com>
[will: rewrote patch]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/pgtable.h  | 22 ++++++++++++++++++++++
 arch/arm64/include/asm/tlbflush.h | 11 +++++++++--
 2 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 50b1ef8584c0..5bbb59c81920 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -22,6 +22,7 @@
 #include <asm/memory.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable-prot.h>
+#include <asm/tlbflush.h>
 
 /*
  * VMALLOC range.
@@ -685,6 +686,27 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
 	return __ptep_test_and_clear_young(ptep);
 }
 
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+					 unsigned long address, pte_t *ptep)
+{
+	int young = ptep_test_and_clear_young(vma, address, ptep);
+
+	if (young) {
+		/*
+		 * We can elide the trailing DSB here since the worst that can
+		 * happen is that a CPU continues to use the young entry in its
+		 * TLB and we mistakenly reclaim the associated page. The
+		 * window for such an event is bounded by the next
+		 * context-switch, which provides a DSB to complete the TLB
+		 * invalidation.
+		 */
+		flush_tlb_page_nosync(vma, address);
+	}
+
+	return young;
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
 static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index c3c0387aee18..a629a4067aae 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -21,6 +21,7 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/mm_types.h>
 #include <linux/sched.h>
 #include <asm/cputype.h>
 #include <asm/mmu.h>
@@ -164,14 +165,20 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 	dsb(ish);
 }
 
-static inline void flush_tlb_page(struct vm_area_struct *vma,
-				  unsigned long uaddr)
+static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
+					 unsigned long uaddr)
 {
 	unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
 
 	dsb(ishst);
 	__tlbi(vale1is, addr);
 	__tlbi_user(vale1is, addr);
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long uaddr)
+{
+	flush_tlb_page_nosync(vma, uaddr);
 	dsb(ish);
 }
 
-- 
cgit 


From 6460d32014717686d3b7963595950ba2c6d1bb5e Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 7 Nov 2018 23:06:15 +0000
Subject: arm64: io: Ensure calls to delay routines are ordered against prior
 readX()

A relatively standard idiom for ensuring that a pair of MMIO writes to a
device arrive at that device with a specified minimum delay between them
is as follows:

	writel_relaxed(42, dev_base + CTL1);
	readl(dev_base + CTL1);
	udelay(10);
	writel_relaxed(42, dev_base + CTL2);

the intention being that the read-back from the device will push the
prior write to CTL1, and the udelay will hold up the write to CTL1 until
at least 10us have elapsed.

Unfortunately, on arm64 where the underlying delay loop is implemented
as a read of the architected counter, the CPU does not guarantee
ordering from the readl() to the delay loop and therefore the delay loop
could in theory be speculated and not provide the desired interval
between the two writes.

Fix this in a similar manner to PowerPC by introducing a dummy control
dependency on the output of readX() which, combined with the ISB in the
read of the architected counter, guarantees that a subsequent delay loop
can not be executed until the readX() has returned its result.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/io.h | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 9f8b915af3a7..d42d00d8d5b6 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -104,7 +104,22 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
 }
 
 /* IO barriers */
-#define __iormb()		rmb()
+#define __iormb(v)							\
+({									\
+	unsigned long tmp;						\
+									\
+	rmb();								\
+									\
+	/*								\
+	 * Create a dummy control dependency from the IO read to any	\
+	 * later instructions. This ensures that a subsequent call to	\
+	 * udelay() will be ordered due to the ISB in get_cycles().	\
+	 */								\
+	asm volatile("eor	%0, %1, %1\n"				\
+		     "cbnz	%0, ."					\
+		     : "=r" (tmp) : "r" (v) : "memory");		\
+})
+
 #define __iowmb()		wmb()
 
 #define mmiowb()		do { } while (0)
@@ -129,10 +144,10 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
  * following Normal memory access. Writes are ordered relative to any prior
  * Normal memory access.
  */
-#define readb(c)		({ u8  __v = readb_relaxed(c); __iormb(); __v; })
-#define readw(c)		({ u16 __v = readw_relaxed(c); __iormb(); __v; })
-#define readl(c)		({ u32 __v = readl_relaxed(c); __iormb(); __v; })
-#define readq(c)		({ u64 __v = readq_relaxed(c); __iormb(); __v; })
+#define readb(c)		({ u8  __v = readb_relaxed(c); __iormb(__v); __v; })
+#define readw(c)		({ u16 __v = readw_relaxed(c); __iormb(__v); __v; })
+#define readl(c)		({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
+#define readq(c)		({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
 
 #define writeb(v,c)		({ __iowmb(); writeb_relaxed((v),(c)); })
 #define writew(v,c)		({ __iowmb(); writew_relaxed((v),(c)); })
@@ -183,9 +198,9 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
 /*
  * io{read,write}{16,32,64}be() macros
  */
-#define ioread16be(p)		({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; })
-#define ioread32be(p)		({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; })
-#define ioread64be(p)		({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(); __v; })
+#define ioread16be(p)		({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(__v); __v; })
+#define ioread32be(p)		({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(__v); __v; })
+#define ioread64be(p)		({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(__v); __v; })
 
 #define iowrite16be(v,p)	({ __iowmb(); __raw_writew((__force __u16)cpu_to_be16(v), p); })
 #define iowrite32be(v,p)	({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); })
-- 
cgit 


From 9eb1c92b47c73249465d388eaa394fe436a3b489 Mon Sep 17 00:00:00 2001
From: Jeremy Linton <jeremy.linton@arm.com>
Date: Tue, 27 Nov 2018 17:59:12 +0000
Subject: arm64: acpi: Prepare for longer MADTs

The BAD_MADT_GICC_ENTRY check is a little too strict because
it rejects MADT entries that don't match the currently known
lengths. We should remove this restriction to avoid problems
if the table length changes. Future code which might depend on
additional fields should be written to validate those fields
before using them, rather than trying to globally check
known MADT version lengths.

Link: https://lkml.kernel.org/r/20181012192937.3819951-1-jeremy.linton@arm.com
Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
[lorenzo.pieralisi@arm.com: added MADT macro comments]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Al Stone <ahs3@redhat.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/acpi.h | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 709208dfdc8b..2def77ec14be 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -22,12 +22,23 @@
 #include <asm/tlbflush.h>
 
 /* Macros for consistency checks of the GICC subtable of MADT */
-#define ACPI_MADT_GICC_LENGTH	\
-	(acpi_gbl_FADT.header.revision < 6 ? 76 : 80)
+
+/*
+ * MADT GICC minimum length refers to the MADT GICC structure table length as
+ * defined in the earliest ACPI version supported on arm64, ie ACPI 5.1.
+ *
+ * The efficiency_class member was added to the
+ * struct acpi_madt_generic_interrupt to represent the MADT GICC structure
+ * "Processor Power Efficiency Class" field, added in ACPI 6.0 whose offset
+ * is therefore used to delimit the MADT GICC structure minimum length
+ * appropriately.
+ */
+#define ACPI_MADT_GICC_MIN_LENGTH   ACPI_OFFSET(  \
+	struct acpi_madt_generic_interrupt, efficiency_class)
 
 #define BAD_MADT_GICC_ENTRY(entry, end)					\
-	(!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH ||	\
-	(unsigned long)(entry) + ACPI_MADT_GICC_LENGTH > (end))
+	(!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \
+	(unsigned long)(entry) + (entry)->header.length > (end))
 
 /* Basic configuration for ACPI */
 #ifdef	CONFIG_ACPI
-- 
cgit 


From 7aaf7b2fd26c3a069472dd9778367b2f941dd866 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 22 Nov 2018 09:46:45 +0100
Subject: arm64/insn: add support for emitting ADR/ADRP instructions

Add support for emitting ADR and ADRP instructions so we can switch
over our PLT generation code in a subsequent patch.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/insn.h |  8 ++++++++
 arch/arm64/kernel/insn.c      | 29 +++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index c6802dea6cab..9c01f04db64d 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -261,6 +261,11 @@ enum aarch64_insn_prfm_policy {
 	AARCH64_INSN_PRFM_POLICY_STRM,
 };
 
+enum aarch64_insn_adr_type {
+	AARCH64_INSN_ADR_TYPE_ADRP,
+	AARCH64_INSN_ADR_TYPE_ADR,
+};
+
 #define	__AARCH64_INSN_FUNCS(abbr, mask, val)	\
 static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
 { return (code & (mask)) == (val); } \
@@ -393,6 +398,9 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
 				 enum aarch64_insn_register src,
 				 int imm, enum aarch64_insn_variant variant,
 				 enum aarch64_insn_adsb_type type);
+u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
+			 enum aarch64_insn_register reg,
+			 enum aarch64_insn_adr_type type);
 u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
 			      enum aarch64_insn_register src,
 			      int immr, int imms,
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 2b3413549734..7820a4a688fa 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -1239,6 +1239,35 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
 	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
 }
 
+u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
+			 enum aarch64_insn_register reg,
+			 enum aarch64_insn_adr_type type)
+{
+	u32 insn;
+	s32 offset;
+
+	switch (type) {
+	case AARCH64_INSN_ADR_TYPE_ADR:
+		insn = aarch64_insn_get_adr_value();
+		offset = addr - pc;
+		break;
+	case AARCH64_INSN_ADR_TYPE_ADRP:
+		insn = aarch64_insn_get_adrp_value();
+		offset = (addr - ALIGN_DOWN(pc, SZ_4K)) >> 12;
+		break;
+	default:
+		pr_err("%s: unknown adr encoding %d\n", __func__, type);
+		return AARCH64_BREAK_FAULT;
+	}
+
+	if (offset < -SZ_1M || offset >= SZ_1M)
+		return AARCH64_BREAK_FAULT;
+
+	insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, reg);
+
+	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_ADR, insn, offset);
+}
+
 /*
  * Decode the imm field of a branch, and return the byte offset as a
  * signed value (so it can be used when computing a new branch
-- 
cgit 


From bdb85cd1d20669dfae813555dddb745ad09323ba Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Thu, 22 Nov 2018 09:46:46 +0100
Subject: arm64/module: switch to ADRP/ADD sequences for PLT entries

Now that we have switched to the small code model entirely, and
reduced the extended KASLR range to 4 GB, we can be sure that the
targets of relative branches that are out of range are in range
for a ADRP/ADD pair, which is one instruction shorter than our
current MOVN/MOVK/MOVK sequence, and is more idiomatic and so it
is more likely to be implemented efficiently by micro-architectures.

So switch over the ordinary PLT code and the special handling of
the Cortex-A53 ADRP errata, as well as the ftrace trampline
handling.

Reviewed-by: Torsten Duwe <duwe@lst.de>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[will: Added a couple of comments in the plt equality check]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/module.h | 36 ++++-----------
 arch/arm64/kernel/ftrace.c      |  2 +-
 arch/arm64/kernel/module-plts.c | 99 ++++++++++++++++++++++++++++++++---------
 arch/arm64/kernel/module.c      |  4 +-
 4 files changed, 88 insertions(+), 53 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index f81c1847312d..905e1bb0e7bd 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -58,39 +58,19 @@ struct plt_entry {
 	 * is exactly what we are dealing with here, we are free to use x16
 	 * as a scratch register in the PLT veneers.
 	 */
-	__le32	mov0;	/* movn	x16, #0x....			*/
-	__le32	mov1;	/* movk	x16, #0x...., lsl #16		*/
-	__le32	mov2;	/* movk	x16, #0x...., lsl #32		*/
+	__le32	adrp;	/* adrp	x16, ....			*/
+	__le32	add;	/* add	x16, x16, #0x....		*/
 	__le32	br;	/* br	x16				*/
 };
 
-static inline struct plt_entry get_plt_entry(u64 val)
+static inline bool is_forbidden_offset_for_adrp(void *place)
 {
-	/*
-	 * MOVK/MOVN/MOVZ opcode:
-	 * +--------+------------+--------+-----------+-------------+---------+
-	 * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
-	 * +--------+------------+--------+-----------+-------------+---------+
-	 *
-	 * Rd     := 0x10 (x16)
-	 * hw     := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
-	 * opc    := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
-	 * sf     := 1 (64-bit variant)
-	 */
-	return (struct plt_entry){
-		cpu_to_le32(0x92800010 | (((~val      ) & 0xffff)) << 5),
-		cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
-		cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
-		cpu_to_le32(0xd61f0200)
-	};
+	return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
+	       cpus_have_const_cap(ARM64_WORKAROUND_843419) &&
+	       ((u64)place & 0xfff) >= 0xff8;
 }
 
-static inline bool plt_entries_equal(const struct plt_entry *a,
-				     const struct plt_entry *b)
-{
-	return a->mov0 == b->mov0 &&
-	       a->mov1 == b->mov1 &&
-	       a->mov2 == b->mov2;
-}
+struct plt_entry get_plt_entry(u64 dst, void *pc);
+bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b);
 
 #endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 50986e388d2b..2135665a8ab3 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -104,7 +104,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		 * is added in the future, but for now, the pr_err() below
 		 * deals with a theoretical issue only.
 		 */
-		trampoline = get_plt_entry(addr);
+		trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline);
 		if (!plt_entries_equal(mod->arch.ftrace_trampoline,
 				       &trampoline)) {
 			if (!plt_entries_equal(mod->arch.ftrace_trampoline,
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index a0efe30211d6..255941394941 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -11,6 +11,61 @@
 #include <linux/module.h>
 #include <linux/sort.h>
 
+static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc,
+					    enum aarch64_insn_register reg)
+{
+	u32 adrp, add;
+
+	adrp = aarch64_insn_gen_adr(pc, dst, reg, AARCH64_INSN_ADR_TYPE_ADRP);
+	add = aarch64_insn_gen_add_sub_imm(reg, reg, dst % SZ_4K,
+					   AARCH64_INSN_VARIANT_64BIT,
+					   AARCH64_INSN_ADSB_ADD);
+
+	return (struct plt_entry){ cpu_to_le32(adrp), cpu_to_le32(add) };
+}
+
+struct plt_entry get_plt_entry(u64 dst, void *pc)
+{
+	struct plt_entry plt;
+	static u32 br;
+
+	if (!br)
+		br = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_16,
+						 AARCH64_INSN_BRANCH_NOLINK);
+
+	plt = __get_adrp_add_pair(dst, (u64)pc, AARCH64_INSN_REG_16);
+	plt.br = cpu_to_le32(br);
+
+	return plt;
+}
+
+bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b)
+{
+	u64 p, q;
+
+	/*
+	 * Check whether both entries refer to the same target:
+	 * do the cheapest checks first.
+	 * If the 'add' or 'br' opcodes are different, then the target
+	 * cannot be the same.
+	 */
+	if (a->add != b->add || a->br != b->br)
+		return false;
+
+	p = ALIGN_DOWN((u64)a, SZ_4K);
+	q = ALIGN_DOWN((u64)b, SZ_4K);
+
+	/*
+	 * If the 'adrp' opcodes are the same then we just need to check
+	 * that they refer to the same 4k region.
+	 */
+	if (a->adrp == b->adrp && p == q)
+		return true;
+
+	return (p + aarch64_insn_adrp_get_offset(le32_to_cpu(a->adrp))) ==
+	       (q + aarch64_insn_adrp_get_offset(le32_to_cpu(b->adrp)));
+}
+
 static bool in_init(const struct module *mod, void *loc)
 {
 	return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
@@ -24,19 +79,23 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
 							  &mod->arch.init;
 	struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr;
 	int i = pltsec->plt_num_entries;
+	int j = i - 1;
 	u64 val = sym->st_value + rela->r_addend;
 
-	plt[i] = get_plt_entry(val);
+	if (is_forbidden_offset_for_adrp(&plt[i].adrp))
+		i++;
+
+	plt[i] = get_plt_entry(val, &plt[i]);
 
 	/*
 	 * Check if the entry we just created is a duplicate. Given that the
 	 * relocations are sorted, this will be the last entry we allocated.
 	 * (if one exists).
 	 */
-	if (i > 0 && plt_entries_equal(plt + i, plt + i - 1))
-		return (u64)&plt[i - 1];
+	if (j >= 0 && plt_entries_equal(plt + i, plt + j))
+		return (u64)&plt[j];
 
-	pltsec->plt_num_entries++;
+	pltsec->plt_num_entries += i - j;
 	if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries))
 		return 0;
 
@@ -51,35 +110,24 @@ u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
 							  &mod->arch.init;
 	struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr;
 	int i = pltsec->plt_num_entries++;
-	u32 mov0, mov1, mov2, br;
+	u32 br;
 	int rd;
 
 	if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries))
 		return 0;
 
+	if (is_forbidden_offset_for_adrp(&plt[i].adrp))
+		i = pltsec->plt_num_entries++;
+
 	/* get the destination register of the ADRP instruction */
 	rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD,
 					  le32_to_cpup((__le32 *)loc));
 
-	/* generate the veneer instructions */
-	mov0 = aarch64_insn_gen_movewide(rd, (u16)~val, 0,
-					 AARCH64_INSN_VARIANT_64BIT,
-					 AARCH64_INSN_MOVEWIDE_INVERSE);
-	mov1 = aarch64_insn_gen_movewide(rd, (u16)(val >> 16), 16,
-					 AARCH64_INSN_VARIANT_64BIT,
-					 AARCH64_INSN_MOVEWIDE_KEEP);
-	mov2 = aarch64_insn_gen_movewide(rd, (u16)(val >> 32), 32,
-					 AARCH64_INSN_VARIANT_64BIT,
-					 AARCH64_INSN_MOVEWIDE_KEEP);
 	br = aarch64_insn_gen_branch_imm((u64)&plt[i].br, (u64)loc + 4,
 					 AARCH64_INSN_BRANCH_NOLINK);
 
-	plt[i] = (struct plt_entry){
-			cpu_to_le32(mov0),
-			cpu_to_le32(mov1),
-			cpu_to_le32(mov2),
-			cpu_to_le32(br)
-		};
+	plt[i] = __get_adrp_add_pair(val, (u64)&plt[i], rd);
+	plt[i].br = cpu_to_le32(br);
 
 	return (u64)&plt[i];
 }
@@ -195,6 +243,15 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
 			break;
 		}
 	}
+
+	if (IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
+	    cpus_have_const_cap(ARM64_WORKAROUND_843419))
+		/*
+		 * Add some slack so we can skip PLT slots that may trigger
+		 * the erratum due to the placement of the ADRP instruction.
+		 */
+		ret += DIV_ROUND_UP(ret, (SZ_4K / sizeof(struct plt_entry)));
+
 	return ret;
 }
 
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index c2abe59c6f8f..f713e2fc4d75 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -203,9 +203,7 @@ static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
 {
 	u32 insn;
 
-	if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) ||
-	    !cpus_have_const_cap(ARM64_WORKAROUND_843419) ||
-	    ((u64)place & 0xfff) < 0xff8)
+	if (!is_forbidden_offset_for_adrp(place))
 		return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
 				      AARCH64_INSN_IMM_ADR);
 
-- 
cgit 


From 3d65b6bbc01ecece8142e62a8a5f1d48ba41a240 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 19 Nov 2018 18:08:49 +0000
Subject: arm64: tlbi: Set MAX_TLBI_OPS to PTRS_PER_PTE

In order to reduce the possibility of soft lock-ups, we bound the
maximum number of TLBI operations performed by a single call to
flush_tlb_range() to an arbitrary constant of 1024.

Whilst this does the job of avoiding lock-ups, we can actually be a bit
smarter by defining this as PTRS_PER_PTE. Due to the structure of our
page tables, using PTRS_PER_PTE means that an outer loop calling
flush_tlb_range() for entire table entries will end up performing just a
single TLBI operation for each entry. As an example, mremap()ing a 1GB
range mapped using 4k pages now requires only 512 TLBI operations when
moving the page tables as opposed to 262144 operations (512*512) when
using the current threshold of 1024.

Cc: Joel Fernandes <joel@joelfernandes.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/tlbflush.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index a629a4067aae..bb4507a11b1b 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -186,7 +186,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
  * This is meant to avoid soft lock-ups on large TLB flushing ranges and not
  * necessarily a performance improvement.
  */
-#define MAX_TLBI_OPS	1024UL
+#define MAX_TLBI_OPS	PTRS_PER_PTE
 
 static inline void __flush_tlb_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end,
@@ -195,7 +195,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 	unsigned long asid = ASID(vma->vm_mm);
 	unsigned long addr;
 
-	if ((end - start) > (MAX_TLBI_OPS * stride)) {
+	if ((end - start) >= (MAX_TLBI_OPS * stride)) {
 		flush_tlb_mm(vma->vm_mm);
 		return;
 	}
-- 
cgit 


From 1b57ec8c75279b873639eb44a215479236f93481 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 29 Nov 2018 16:31:04 +0000
Subject: arm64: io: Ensure value passed to __iormb() is held in a 64-bit
 register

As of commit 6460d3201471 ("arm64: io: Ensure calls to delay routines
are ordered against prior readX()"), MMIO reads smaller than 64 bits
fail to compile under clang because we end up mixing 32-bit and 64-bit
register operands for the same data processing instruction:

./include/asm-generic/io.h:695:9: warning: value size does not match register size specified by the constraint and modifier [-Wasm-operand-widths]
        return readb(addr);
               ^
./arch/arm64/include/asm/io.h:147:58: note: expanded from macro 'readb'
                                                                       ^
./include/asm-generic/io.h:695:9: note: use constraint modifier "w"
./arch/arm64/include/asm/io.h:147:50: note: expanded from macro 'readb'
                                                               ^
./arch/arm64/include/asm/io.h:118:24: note: expanded from macro '__iormb'
        asm volatile("eor       %0, %1, %1\n"                           \
                                    ^

Fix the build by casting the macro argument to 'unsigned long' when used
as an input to the inline asm.

Reported-by: Nick Desaulniers <nick.desaulniers@gmail.com>
Reported-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/io.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index d42d00d8d5b6..ee723835c1f4 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -117,7 +117,8 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
 	 */								\
 	asm volatile("eor	%0, %1, %1\n"				\
 		     "cbnz	%0, ."					\
-		     : "=r" (tmp) : "r" (v) : "memory");		\
+		     : "=r" (tmp) : "r" ((unsigned long)(v))		\
+		     : "memory");					\
 })
 
 #define __iowmb()		wmb()
-- 
cgit 


From 5c176aff5b5a7027840c37da9d48a8f9cedb08b9 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 15 Nov 2018 22:42:00 +0000
Subject: arm64: ftrace: enable graph FP test

The core frace code has an optional sanity check on the frame pointer
passed by ftrace_graph_caller and return_to_handler. This is cheap,
useful, and enabled unconditionally on x86, sparc, and riscv.

Let's do the same on arm64, so that we can catch any problems early.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Torsten Duwe <duwe@suse.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/ftrace.h  | 1 +
 arch/arm64/kernel/entry-ftrace.S | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index caa955f10e19..6795c147cbcc 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -13,6 +13,7 @@
 
 #include <asm/insn.h>
 
+#define HAVE_FUNCTION_GRAPH_FP_TEST
 #define MCOUNT_ADDR		((unsigned long)_mcount)
 #define MCOUNT_INSN_SIZE	AARCH64_INSN_SIZE
 
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index a3045fa04bd0..bb549f71948d 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -207,8 +207,7 @@ ENDPROC(ftrace_graph_caller)
  * void return_to_handler(void)
  *
  * Run ftrace_return_to_handler() before going back to parent.
- * @fp is checked against the value passed by ftrace_graph_caller()
- * only when HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
+ * @fp is checked against the value passed by ftrace_graph_caller().
  */
 ENTRY(return_to_handler)
 	save_return_regs
-- 
cgit 


From c9460dcb06ee68af1c75f9232603ece071901abe Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Fri, 30 Nov 2018 17:18:00 +0000
Subject: arm64: capabilities: Merge entries for ARM64_WORKAROUND_CLEAN_CACHE

We have two entries for ARM64_WORKAROUND_CLEAN_CACHE capability :

1) ARM Errata 826319, 827319, 824069, 819472 on A53 r0p[012]
2) ARM Errata 819472 on A53 r0p[01]

Both have the same work around. Merge these entries to avoid
duplicate entries for a single capability. Add a new Kconfig
entry to control the "capability" entry to make it easier
to handle combinations of the CONFIGs.

Cc: Will Deacon <will.deacon@arm.com>
Cc: Andre Przywara <andre.przywara@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig               |  7 +++++++
 arch/arm64/include/asm/cputype.h |  1 +
 arch/arm64/kernel/cpu_errata.c   | 28 ++++++++++++++++------------
 3 files changed, 24 insertions(+), 12 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index bf57c48c77df..6d2b25f51bb3 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -313,9 +313,13 @@ menu "Kernel Features"
 
 menu "ARM errata workarounds via the alternatives framework"
 
+config ARM64_WORKAROUND_CLEAN_CACHE
+	def_bool n
+
 config ARM64_ERRATUM_826319
 	bool "Cortex-A53: 826319: System might deadlock if a write cannot complete until read data is accepted"
 	default y
+	select ARM64_WORKAROUND_CLEAN_CACHE
 	help
 	  This option adds an alternative code sequence to work around ARM
 	  erratum 826319 on Cortex-A53 parts up to r0p2 with an AMBA 4 ACE or
@@ -337,6 +341,7 @@ config ARM64_ERRATUM_826319
 config ARM64_ERRATUM_827319
 	bool "Cortex-A53: 827319: Data cache clean instructions might cause overlapping transactions to the interconnect"
 	default y
+	select ARM64_WORKAROUND_CLEAN_CACHE
 	help
 	  This option adds an alternative code sequence to work around ARM
 	  erratum 827319 on Cortex-A53 parts up to r0p2 with an AMBA 5 CHI
@@ -358,6 +363,7 @@ config ARM64_ERRATUM_827319
 config ARM64_ERRATUM_824069
 	bool "Cortex-A53: 824069: Cache line might not be marked as clean after a CleanShared snoop"
 	default y
+	select ARM64_WORKAROUND_CLEAN_CACHE
 	help
 	  This option adds an alternative code sequence to work around ARM
 	  erratum 824069 on Cortex-A53 parts up to r0p2 when it is connected
@@ -380,6 +386,7 @@ config ARM64_ERRATUM_824069
 config ARM64_ERRATUM_819472
 	bool "Cortex-A53: 819472: Store exclusive instructions might cause data corruption"
 	default y
+	select ARM64_WORKAROUND_CLEAN_CACHE
 	help
 	  This option adds an alternative code sequence to work around ARM
 	  erratum 819472 on Cortex-A53 parts up to r0p1 with an L2 cache
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 12f93e4d2452..2e26375bb753 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -151,6 +151,7 @@ struct midr_range {
 		.rv_max = MIDR_CPU_VAR_REV(v_max, r_max),	\
 	}
 
+#define MIDR_REV_RANGE(m, v, r_min, r_max) MIDR_RANGE(m, v, r_min, v, r_max)
 #define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf)
 
 static inline bool is_midr_in_range(u32 midr, struct midr_range const *range)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 00d93a91c4d5..dc7ab05b9400 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -570,24 +570,28 @@ static const struct midr_range arm64_harden_el2_vectors[] = {
 
 #endif
 
-const struct arm64_cpu_capabilities arm64_errata[] = {
+#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
+static const struct midr_range workaround_clean_cache[] = {
 #if	defined(CONFIG_ARM64_ERRATUM_826319) || \
 	defined(CONFIG_ARM64_ERRATUM_827319) || \
 	defined(CONFIG_ARM64_ERRATUM_824069)
-	{
-	/* Cortex-A53 r0p[012] */
-		.desc = "ARM errata 826319, 827319, 824069",
-		.capability = ARM64_WORKAROUND_CLEAN_CACHE,
-		ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 2),
-		.cpu_enable = cpu_enable_cache_maint_trap,
-	},
+	/* Cortex-A53 r0p[012]: ARM errata 826319, 827319, 824069 */
+	MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 2),
+#endif
+#ifdef	CONFIG_ARM64_ERRATUM_819472
+	/* Cortex-A53 r0p[01] : ARM errata 819472 */
+	MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 1),
 #endif
-#ifdef CONFIG_ARM64_ERRATUM_819472
+	{},
+};
+#endif
+
+const struct arm64_cpu_capabilities arm64_errata[] = {
+#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
 	{
-	/* Cortex-A53 r0p[01] */
-		.desc = "ARM errata 819472",
+		.desc = "ARM errata 826319, 827319, 824069, 819472",
 		.capability = ARM64_WORKAROUND_CLEAN_CACHE,
-		ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 1),
+		ERRATA_MIDR_RANGE_LIST(workaround_clean_cache),
 		.cpu_enable = cpu_enable_cache_maint_trap,
 	},
 #endif
-- 
cgit 


From f58cdf7e3cab33306efd999c23b4fb606184abf3 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Fri, 30 Nov 2018 17:18:01 +0000
Subject: arm64: capabilities: Merge duplicate Cavium erratum entries

Merge duplicate entries for a single capability using the midr
range list for Cavium errata 30115 and 27456.

Cc: Andrew Pinski <apinski@cavium.com>
Cc: David Daney <david.daney@cavium.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cputype.h |  1 +
 arch/arm64/kernel/cpu_errata.c   | 50 +++++++++++++++++++---------------------
 2 files changed, 25 insertions(+), 26 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 2e26375bb753..951ed1a4e5c9 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -152,6 +152,7 @@ struct midr_range {
 	}
 
 #define MIDR_REV_RANGE(m, v, r_min, r_max) MIDR_RANGE(m, v, r_min, v, r_max)
+#define MIDR_REV(m, v, r) MIDR_RANGE(m, v, r, v, r)
 #define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf)
 
 static inline bool is_midr_in_range(u32 midr, struct midr_range const *range)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index dc7ab05b9400..51f70f60fe3b 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -570,6 +570,28 @@ static const struct midr_range arm64_harden_el2_vectors[] = {
 
 #endif
 
+#ifdef CONFIG_CAVIUM_ERRATUM_27456
+static const struct midr_range cavium_erratum_27456_cpus[] = {
+	/* Cavium ThunderX, T88 pass 1.x - 2.1 */
+	MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1),
+	/* Cavium ThunderX, T81 pass 1.0 */
+	MIDR_REV(MIDR_THUNDERX_81XX, 0, 0),
+	{},
+};
+#endif
+
+#ifdef CONFIG_CAVIUM_ERRATUM_30115
+static const struct midr_range cavium_erratum_30115_cpus[] = {
+	/* Cavium ThunderX, T88 pass 1.x - 2.2 */
+	MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 2),
+	/* Cavium ThunderX, T81 pass 1.0 - 1.2 */
+	MIDR_REV_RANGE(MIDR_THUNDERX_81XX, 0, 0, 2),
+	/* Cavium ThunderX, T83 pass 1.0 */
+	MIDR_REV(MIDR_THUNDERX_83XX, 0, 0),
+	{},
+};
+#endif
+
 #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
 static const struct midr_range workaround_clean_cache[] = {
 #if	defined(CONFIG_ARM64_ERRATUM_826319) || \
@@ -642,40 +664,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_27456
 	{
-	/* Cavium ThunderX, T88 pass 1.x - 2.1 */
 		.desc = "Cavium erratum 27456",
 		.capability = ARM64_WORKAROUND_CAVIUM_27456,
-		ERRATA_MIDR_RANGE(MIDR_THUNDERX,
-				  0, 0,
-				  1, 1),
-	},
-	{
-	/* Cavium ThunderX, T81 pass 1.0 */
-		.desc = "Cavium erratum 27456",
-		.capability = ARM64_WORKAROUND_CAVIUM_27456,
-		ERRATA_MIDR_REV(MIDR_THUNDERX_81XX, 0, 0),
+		ERRATA_MIDR_RANGE_LIST(cavium_erratum_27456_cpus),
 	},
 #endif
 #ifdef CONFIG_CAVIUM_ERRATUM_30115
 	{
-	/* Cavium ThunderX, T88 pass 1.x - 2.2 */
-		.desc = "Cavium erratum 30115",
-		.capability = ARM64_WORKAROUND_CAVIUM_30115,
-		ERRATA_MIDR_RANGE(MIDR_THUNDERX,
-				      0, 0,
-				      1, 2),
-	},
-	{
-	/* Cavium ThunderX, T81 pass 1.0 - 1.2 */
-		.desc = "Cavium erratum 30115",
-		.capability = ARM64_WORKAROUND_CAVIUM_30115,
-		ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX_81XX, 0, 0, 2),
-	},
-	{
-	/* Cavium ThunderX, T83 pass 1.0 */
 		.desc = "Cavium erratum 30115",
 		.capability = ARM64_WORKAROUND_CAVIUM_30115,
-		ERRATA_MIDR_REV(MIDR_THUNDERX_83XX, 0, 0),
+		ERRATA_MIDR_RANGE_LIST(cavium_erratum_30115_cpus),
 	},
 #endif
 	{
-- 
cgit 


From f56063c51f9fb3d9af4e7c707926964cf924b814 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Thu, 15 Nov 2018 14:52:46 +0900
Subject: arm64: add image head flag definitions

Those image head's flags will be used later by kexec_file loader.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Acked-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/image.h | 59 ++++++++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/head.S       |  3 ++-
 arch/arm64/kernel/image.h      | 21 +++++++++------
 3 files changed, 74 insertions(+), 9 deletions(-)
 create mode 100644 arch/arm64/include/asm/image.h

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/image.h b/arch/arm64/include/asm/image.h
new file mode 100644
index 000000000000..e2c27a2278e9
--- /dev/null
+++ b/arch/arm64/include/asm/image.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_IMAGE_H
+#define __ASM_IMAGE_H
+
+#define ARM64_IMAGE_MAGIC	"ARM\x64"
+
+#define ARM64_IMAGE_FLAG_BE_SHIFT		0
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT	(ARM64_IMAGE_FLAG_BE_SHIFT + 1)
+#define ARM64_IMAGE_FLAG_PHYS_BASE_SHIFT \
+					(ARM64_IMAGE_FLAG_PAGE_SIZE_SHIFT + 2)
+#define ARM64_IMAGE_FLAG_BE_MASK		0x1
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_MASK		0x3
+#define ARM64_IMAGE_FLAG_PHYS_BASE_MASK		0x1
+
+#define ARM64_IMAGE_FLAG_LE			0
+#define ARM64_IMAGE_FLAG_BE			1
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_4K		1
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_16K		2
+#define ARM64_IMAGE_FLAG_PAGE_SIZE_64K		3
+#define ARM64_IMAGE_FLAG_PHYS_BASE		1
+
+#ifndef __ASSEMBLY__
+
+#define arm64_image_flag_field(flags, field) \
+				(((flags) >> field##_SHIFT) & field##_MASK)
+
+/*
+ * struct arm64_image_header - arm64 kernel image header
+ * See Documentation/arm64/booting.txt for details
+ *
+ * @code0:		Executable code, or
+ *   @mz_header		  alternatively used for part of MZ header
+ * @code1:		Executable code
+ * @text_offset:	Image load offset
+ * @image_size:		Effective Image size
+ * @flags:		kernel flags
+ * @reserved:		reserved
+ * @magic:		Magic number
+ * @reserved5:		reserved, or
+ *   @pe_header:	  alternatively used for PE COFF offset
+ */
+
+struct arm64_image_header {
+	__le32 code0;
+	__le32 code1;
+	__le64 text_offset;
+	__le64 image_size;
+	__le64 flags;
+	__le64 res2;
+	__le64 res3;
+	__le64 res4;
+	__le32 magic;
+	__le32 res5;
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_IMAGE_H */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 4471f570a295..4d0b78c95c34 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -31,6 +31,7 @@
 #include <asm/cache.h>
 #include <asm/cputype.h>
 #include <asm/elf.h>
+#include <asm/image.h>
 #include <asm/kernel-pgtable.h>
 #include <asm/kvm_arm.h>
 #include <asm/memory.h>
@@ -91,7 +92,7 @@ _head:
 	.quad	0				// reserved
 	.quad	0				// reserved
 	.quad	0				// reserved
-	.ascii	"ARM\x64"			// Magic number
+	.ascii	ARM64_IMAGE_MAGIC		// Magic number
 #ifdef CONFIG_EFI
 	.long	pe_header - _head		// Offset to the PE header.
 
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index a820ed07fb80..d843f9cbcd92 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -15,13 +15,15 @@
  * You should have received a copy of the GNU General Public License
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
-#ifndef __ASM_IMAGE_H
-#define __ASM_IMAGE_H
+#ifndef __ARM64_KERNEL_IMAGE_H
+#define __ARM64_KERNEL_IMAGE_H
 
 #ifndef LINKER_SCRIPT
 #error This file should only be included in vmlinux.lds.S
 #endif
 
+#include <asm/image.h>
+
 /*
  * There aren't any ELF relocations we can use to endian-swap values known only
  * at link time (e.g. the subtraction of two symbol addresses), so we must get
@@ -47,19 +49,22 @@
 	sym##_lo32 = DATA_LE32((data) & 0xffffffff);		\
 	sym##_hi32 = DATA_LE32((data) >> 32)
 
+#define __HEAD_FLAG(field)	(__HEAD_FLAG_##field << \
+					ARM64_IMAGE_FLAG_##field##_SHIFT)
+
 #ifdef CONFIG_CPU_BIG_ENDIAN
-#define __HEAD_FLAG_BE		1
+#define __HEAD_FLAG_BE		ARM64_IMAGE_FLAG_BE
 #else
-#define __HEAD_FLAG_BE		0
+#define __HEAD_FLAG_BE		ARM64_IMAGE_FLAG_LE
 #endif
 
 #define __HEAD_FLAG_PAGE_SIZE	((PAGE_SHIFT - 10) / 2)
 
 #define __HEAD_FLAG_PHYS_BASE	1
 
-#define __HEAD_FLAGS		((__HEAD_FLAG_BE << 0) |	\
-				 (__HEAD_FLAG_PAGE_SIZE << 1) |	\
-				 (__HEAD_FLAG_PHYS_BASE << 3))
+#define __HEAD_FLAGS		(__HEAD_FLAG(BE)	| \
+				 __HEAD_FLAG(PAGE_SIZE) | \
+				 __HEAD_FLAG(PHYS_BASE))
 
 /*
  * These will output as part of the Image header, which should be little-endian
@@ -119,4 +124,4 @@ __efistub_screen_info		= KALLSYMS_HIDE(screen_info);
 
 #endif
 
-#endif /* __ASM_IMAGE_H */
+#endif /* __ARM64_KERNEL_IMAGE_H */
-- 
cgit 


From bdd2c9d1c33357462d1d38eb04bbb5dc452eed40 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Thu, 15 Nov 2018 14:52:47 +0900
Subject: arm64: cpufeature: add MMFR0 helper functions

Those helper functions for MMFR0 register will be used later by kexec_file
loader.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 48 +++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 7e2ec64aa414..ef118d819fe8 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -486,11 +486,59 @@ static inline bool system_supports_32bit_el0(void)
 	return cpus_have_const_cap(ARM64_HAS_32BIT_EL0);
 }
 
+static inline bool system_supports_4kb_granule(void)
+{
+	u64 mmfr0;
+	u32 val;
+
+	mmfr0 =	read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+	val = cpuid_feature_extract_unsigned_field(mmfr0,
+						ID_AA64MMFR0_TGRAN4_SHIFT);
+
+	return val == ID_AA64MMFR0_TGRAN4_SUPPORTED;
+}
+
+static inline bool system_supports_64kb_granule(void)
+{
+	u64 mmfr0;
+	u32 val;
+
+	mmfr0 =	read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+	val = cpuid_feature_extract_unsigned_field(mmfr0,
+						ID_AA64MMFR0_TGRAN64_SHIFT);
+
+	return val == ID_AA64MMFR0_TGRAN64_SUPPORTED;
+}
+
+static inline bool system_supports_16kb_granule(void)
+{
+	u64 mmfr0;
+	u32 val;
+
+	mmfr0 =	read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+	val = cpuid_feature_extract_unsigned_field(mmfr0,
+						ID_AA64MMFR0_TGRAN16_SHIFT);
+
+	return val == ID_AA64MMFR0_TGRAN16_SUPPORTED;
+}
+
 static inline bool system_supports_mixed_endian_el0(void)
 {
 	return id_aa64mmfr0_mixed_endian_el0(read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1));
 }
 
+static inline bool system_supports_mixed_endian(void)
+{
+	u64 mmfr0;
+	u32 val;
+
+	mmfr0 =	read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+	val = cpuid_feature_extract_unsigned_field(mmfr0,
+						ID_AA64MMFR0_BIGENDEL_SHIFT);
+
+	return val == 0x1;
+}
+
 static inline bool system_supports_fpsimd(void)
 {
 	return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD);
-- 
cgit 


From 52b2a8af7436044cfcb27e4b0f72c2ce1f3890da Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Thu, 15 Nov 2018 14:52:49 +0900
Subject: arm64: kexec_file: load initrd and device-tree

load_other_segments() is expected to allocate and place all the necessary
memory segments other than kernel, including initrd and device-tree
blob (and elf core header for crash).
While most of the code was borrowed from kexec-tools' counterpart,
users may not be allowed to specify dtb explicitly, instead, the dtb
presented by the original boot loader is reused.

arch_kimage_kernel_post_load_cleanup() is responsible for freeing arm64-
specific data allocated in load_other_segments().

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/kexec.h         |  17 +++
 arch/arm64/kernel/machine_kexec_file.c | 185 +++++++++++++++++++++++++++++++++
 2 files changed, 202 insertions(+)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index e17f0529a882..bbb5f505b0ba 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -93,6 +93,23 @@ static inline void crash_prepare_suspend(void) {}
 static inline void crash_post_resume(void) {}
 #endif
 
+#ifdef CONFIG_KEXEC_FILE
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+	void *dtb;
+	unsigned long dtb_mem;
+};
+
+struct kimage;
+
+extern int arch_kimage_file_post_load_cleanup(struct kimage *image);
+extern int load_other_segments(struct kimage *image,
+		unsigned long kernel_load_addr, unsigned long kernel_size,
+		char *initrd, unsigned long initrd_len,
+		char *cmdline);
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index c38a8048ed00..b433d947d486 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -5,12 +5,197 @@
  * Copyright (C) 2018 Linaro Limited
  * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
  *
+ * Most code is derived from arm64 port of kexec-tools
  */
 
 #define pr_fmt(fmt) "kexec_file: " fmt
 
+#include <linux/ioport.h>
+#include <linux/kernel.h>
 #include <linux/kexec.h>
+#include <linux/libfdt.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+/* relevant device tree properties */
+#define FDT_PSTR_INITRD_STA	"linux,initrd-start"
+#define FDT_PSTR_INITRD_END	"linux,initrd-end"
+#define FDT_PSTR_BOOTARGS	"bootargs"
 
 const struct kexec_file_ops * const kexec_file_loaders[] = {
 	NULL
 };
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+	vfree(image->arch.dtb);
+	image->arch.dtb = NULL;
+
+	return kexec_image_post_load_cleanup_default(image);
+}
+
+static int setup_dtb(struct kimage *image,
+		     unsigned long initrd_load_addr, unsigned long initrd_len,
+		     char *cmdline, void *dtb)
+{
+	int nodeoffset;
+	int ret;
+
+	nodeoffset = fdt_path_offset(dtb, "/chosen");
+	if (nodeoffset < 0)
+		return -EINVAL;
+
+	/* add bootargs */
+	if (cmdline) {
+		ret = fdt_setprop_string(dtb, nodeoffset, FDT_PSTR_BOOTARGS,
+							cmdline);
+		if (ret)
+			return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL);
+	} else {
+		ret = fdt_delprop(dtb, nodeoffset, FDT_PSTR_BOOTARGS);
+		if (ret && (ret != -FDT_ERR_NOTFOUND))
+			return -EINVAL;
+	}
+
+	/* add initrd-* */
+	if (initrd_load_addr) {
+		ret = fdt_setprop_u64(dtb, nodeoffset, FDT_PSTR_INITRD_STA,
+							initrd_load_addr);
+		if (ret)
+			return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL);
+
+		ret = fdt_setprop_u64(dtb, nodeoffset, FDT_PSTR_INITRD_END,
+						initrd_load_addr + initrd_len);
+		if (ret)
+			return (ret == -FDT_ERR_NOSPACE ? -ENOMEM : -EINVAL);
+	} else {
+		ret = fdt_delprop(dtb, nodeoffset, FDT_PSTR_INITRD_STA);
+		if (ret && (ret != -FDT_ERR_NOTFOUND))
+			return -EINVAL;
+
+		ret = fdt_delprop(dtb, nodeoffset, FDT_PSTR_INITRD_END);
+		if (ret && (ret != -FDT_ERR_NOTFOUND))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * More space needed so that we can add initrd and bootargs.
+ */
+#define DTB_EXTRA_SPACE 0x1000
+
+static int create_dtb(struct kimage *image,
+		      unsigned long initrd_load_addr, unsigned long initrd_len,
+		      char *cmdline, void **dtb)
+{
+	void *buf;
+	size_t buf_size;
+	int ret;
+
+	buf_size = fdt_totalsize(initial_boot_params)
+			+ strlen(cmdline) + DTB_EXTRA_SPACE;
+
+	for (;;) {
+		buf = vmalloc(buf_size);
+		if (!buf)
+			return -ENOMEM;
+
+		/* duplicate a device tree blob */
+		ret = fdt_open_into(initial_boot_params, buf, buf_size);
+		if (ret)
+			return -EINVAL;
+
+		ret = setup_dtb(image, initrd_load_addr, initrd_len,
+				cmdline, buf);
+		if (ret) {
+			vfree(buf);
+			if (ret == -ENOMEM) {
+				/* unlikely, but just in case */
+				buf_size += DTB_EXTRA_SPACE;
+				continue;
+			} else {
+				return ret;
+			}
+		}
+
+		/* trim it */
+		fdt_pack(buf);
+		*dtb = buf;
+
+		return 0;
+	}
+}
+
+int load_other_segments(struct kimage *image,
+			unsigned long kernel_load_addr,
+			unsigned long kernel_size,
+			char *initrd, unsigned long initrd_len,
+			char *cmdline)
+{
+	struct kexec_buf kbuf;
+	void *dtb = NULL;
+	unsigned long initrd_load_addr = 0, dtb_len;
+	int ret = 0;
+
+	kbuf.image = image;
+	/* not allocate anything below the kernel */
+	kbuf.buf_min = kernel_load_addr + kernel_size;
+
+	/* load initrd */
+	if (initrd) {
+		kbuf.buffer = initrd;
+		kbuf.bufsz = initrd_len;
+		kbuf.mem = 0;
+		kbuf.memsz = initrd_len;
+		kbuf.buf_align = 0;
+		/* within 1GB-aligned window of up to 32GB in size */
+		kbuf.buf_max = round_down(kernel_load_addr, SZ_1G)
+						+ (unsigned long)SZ_1G * 32;
+		kbuf.top_down = false;
+
+		ret = kexec_add_buffer(&kbuf);
+		if (ret)
+			goto out_err;
+		initrd_load_addr = kbuf.mem;
+
+		pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+				initrd_load_addr, initrd_len, initrd_len);
+	}
+
+	/* load dtb */
+	ret = create_dtb(image, initrd_load_addr, initrd_len, cmdline, &dtb);
+	if (ret) {
+		pr_err("Preparing for new dtb failed\n");
+		goto out_err;
+	}
+
+	dtb_len = fdt_totalsize(dtb);
+	kbuf.buffer = dtb;
+	kbuf.bufsz = dtb_len;
+	kbuf.mem = 0;
+	kbuf.memsz = dtb_len;
+	/* not across 2MB boundary */
+	kbuf.buf_align = SZ_2M;
+	kbuf.buf_max = ULONG_MAX;
+	kbuf.top_down = true;
+
+	ret = kexec_add_buffer(&kbuf);
+	if (ret)
+		goto out_err;
+	image->arch.dtb = dtb;
+	image->arch.dtb_mem = kbuf.mem;
+
+	pr_debug("Loaded dtb at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+			kbuf.mem, dtb_len, dtb_len);
+
+	return 0;
+
+out_err:
+	vfree(dtb);
+	return ret;
+}
-- 
cgit 


From f3b70e50942960ecc691367bb937e35cdc5e28d3 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Thu, 15 Nov 2018 14:52:50 +0900
Subject: arm64: kexec_file: allow for loading Image-format kernel

This patch provides kexec_file_ops for "Image"-format kernel. In this
implementation, a binary is always loaded with a fixed offset identified
in text_offset field of its header.

Regarding signature verification for trusted boot, this patch doesn't
contains CONFIG_KEXEC_VERIFY_SIG support, which is to be added later
in this series, but file-attribute-based verification is still a viable
option by enabling IMA security subsystem.

You can sign(label) a to-be-kexec'ed kernel image on target file system
with:
    $ evmctl ima_sign --key /path/to/private_key.pem Image

On live system, you must have IMA enforced with, at least, the following
security policy:
    "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig"

See more details about IMA here:
    https://sourceforge.net/p/linux-ima/wiki/Home/

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/kexec.h         |   2 +
 arch/arm64/kernel/Makefile             |   2 +-
 arch/arm64/kernel/kexec_image.c        | 113 +++++++++++++++++++++++++++++++++
 arch/arm64/kernel/machine_kexec_file.c |   1 +
 4 files changed, 117 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/kernel/kexec_image.c

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index bbb5f505b0ba..67e4cb75d1fd 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -101,6 +101,8 @@ struct kimage_arch {
 	unsigned long dtb_mem;
 };
 
+extern const struct kexec_file_ops kexec_image_ops;
+
 struct kimage;
 
 extern int arch_kimage_file_post_load_cleanup(struct kimage *image);
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 030a39bff117..48868255f09c 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -51,7 +51,7 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE)	+= kaslr.o
 arm64-obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
 arm64-obj-$(CONFIG_KEXEC_CORE)		+= machine_kexec.o relocate_kernel.o	\
 					   cpu-reset.o
-arm64-obj-$(CONFIG_KEXEC_FILE)		+= machine_kexec_file.o
+arm64-obj-$(CONFIG_KEXEC_FILE)		+= machine_kexec_file.o kexec_image.o
 arm64-obj-$(CONFIG_ARM64_RELOC_TEST)	+= arm64-reloc-test.o
 arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
 arm64-obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
new file mode 100644
index 000000000000..9f0d8b5d62d3
--- /dev/null
+++ b/arch/arm64/kernel/kexec_image.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Kexec image loader
+
+ * Copyright (C) 2018 Linaro Limited
+ * Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ */
+
+#define pr_fmt(fmt)	"kexec_file(Image): " fmt
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+#include <asm/cpufeature.h>
+#include <asm/image.h>
+#include <asm/memory.h>
+
+static int image_probe(const char *kernel_buf, unsigned long kernel_len)
+{
+	const struct arm64_image_header *h;
+
+	h = (const struct arm64_image_header *)(kernel_buf);
+
+	if (!h || (kernel_len < sizeof(*h)) ||
+			memcmp(&h->magic, ARM64_IMAGE_MAGIC,
+				sizeof(h->magic)))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void *image_load(struct kimage *image,
+				char *kernel, unsigned long kernel_len,
+				char *initrd, unsigned long initrd_len,
+				char *cmdline, unsigned long cmdline_len)
+{
+	struct arm64_image_header *h;
+	u64 flags, value;
+	bool be_image, be_kernel;
+	struct kexec_buf kbuf;
+	unsigned long text_offset;
+	struct kexec_segment *kernel_segment;
+	int ret;
+
+	/*
+	 * We require a kernel with an unambiguous Image header. Per
+	 * Documentation/booting.txt, this is the case when image_size
+	 * is non-zero (practically speaking, since v3.17).
+	 */
+	h = (struct arm64_image_header *)kernel;
+	if (!h->image_size)
+		return ERR_PTR(-EINVAL);
+
+	/* Check cpu features */
+	flags = le64_to_cpu(h->flags);
+	be_image = arm64_image_flag_field(flags, ARM64_IMAGE_FLAG_BE);
+	be_kernel = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+	if ((be_image != be_kernel) && !system_supports_mixed_endian())
+		return ERR_PTR(-EINVAL);
+
+	value = arm64_image_flag_field(flags, ARM64_IMAGE_FLAG_PAGE_SIZE);
+	if (((value == ARM64_IMAGE_FLAG_PAGE_SIZE_4K) &&
+			!system_supports_4kb_granule()) ||
+	    ((value == ARM64_IMAGE_FLAG_PAGE_SIZE_64K) &&
+			!system_supports_64kb_granule()) ||
+	    ((value == ARM64_IMAGE_FLAG_PAGE_SIZE_16K) &&
+			!system_supports_16kb_granule()))
+		return ERR_PTR(-EINVAL);
+
+	/* Load the kernel */
+	kbuf.image = image;
+	kbuf.buf_min = 0;
+	kbuf.buf_max = ULONG_MAX;
+	kbuf.top_down = false;
+
+	kbuf.buffer = kernel;
+	kbuf.bufsz = kernel_len;
+	kbuf.mem = 0;
+	kbuf.memsz = le64_to_cpu(h->image_size);
+	text_offset = le64_to_cpu(h->text_offset);
+	kbuf.buf_align = MIN_KIMG_ALIGN;
+
+	/* Adjust kernel segment with TEXT_OFFSET */
+	kbuf.memsz += text_offset;
+
+	ret = kexec_add_buffer(&kbuf);
+	if (ret)
+		return ERR_PTR(ret);
+
+	kernel_segment = &image->segment[image->nr_segments - 1];
+	kernel_segment->mem += text_offset;
+	kernel_segment->memsz -= text_offset;
+	image->start = kernel_segment->mem;
+
+	pr_debug("Loaded kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+				kernel_segment->mem, kbuf.bufsz,
+				kernel_segment->memsz);
+
+	/* Load additional data */
+	ret = load_other_segments(image,
+				kernel_segment->mem, kernel_segment->memsz,
+				initrd, initrd_len, cmdline);
+
+	return ERR_PTR(ret);
+}
+
+const struct kexec_file_ops kexec_image_ops = {
+	.probe = image_probe,
+	.load = image_load,
+};
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index b433d947d486..7e9d5ed3e238 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -26,6 +26,7 @@
 #define FDT_PSTR_BOOTARGS	"bootargs"
 
 const struct kexec_file_ops * const kexec_file_loaders[] = {
+	&kexec_image_ops,
 	NULL
 };
 
-- 
cgit 


From 0b587c84e42151fc5a636c7cebf7b03b281dc672 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Fri, 30 Nov 2018 17:18:06 +0000
Subject: arm64: capabilities: Batch cpu_enable callbacks

We use a stop_machine call for each available capability to
enable it on all the CPUs available at boot time. Instead
we could batch the cpu_enable callbacks to a single stop_machine()
call to save us some time.

Reviewed-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpufeature.h |  3 ++
 arch/arm64/kernel/cpufeature.c      | 70 +++++++++++++++++++++++--------------
 2 files changed, 47 insertions(+), 26 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 7e2ec64aa414..0a15e2c55f1b 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -357,6 +357,9 @@ extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
 extern struct static_key_false arm64_const_caps_ready;
 
+#define for_each_available_cap(cap)		\
+	for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS)
+
 bool this_cpu_has_cap(unsigned int cap);
 
 static inline bool cpu_have_feature(unsigned int num)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 4b5cceaecf6e..1aeb62500fd4 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1531,11 +1531,27 @@ static void update_cpu_capabilities(u16 scope_mask)
 	}
 }
 
-static int __enable_cpu_capability(void *arg)
+/*
+ * Enable all the available capabilities on this CPU. The capabilities
+ * with BOOT_CPU scope are handled separately and hence skipped here.
+ */
+static int cpu_enable_non_boot_scope_capabilities(void *__unused)
 {
-	const struct arm64_cpu_capabilities *cap = arg;
+	int i;
+	u16 non_boot_scope = SCOPE_ALL & ~SCOPE_BOOT_CPU;
+
+	for_each_available_cap(i) {
+		const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[i];
+
+		if (WARN_ON(!cap))
+			continue;
+
+		if (!(cap->type & non_boot_scope))
+			continue;
 
-	cap->cpu_enable(cap);
+		if (cap->cpu_enable)
+			cap->cpu_enable(cap);
+	}
 	return 0;
 }
 
@@ -1543,21 +1559,29 @@ static int __enable_cpu_capability(void *arg)
  * Run through the enabled capabilities and enable() it on all active
  * CPUs
  */
-static void __init
-__enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
-			  u16 scope_mask)
+static void __init enable_cpu_capabilities(u16 scope_mask)
 {
+	int i;
+	const struct arm64_cpu_capabilities *caps;
+	bool boot_scope;
+
 	scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
-	for (; caps->matches; caps++) {
-		unsigned int num = caps->capability;
+	boot_scope = !!(scope_mask & SCOPE_BOOT_CPU);
+
+	for (i = 0; i < ARM64_NCAPS; i++) {
+		unsigned int num;
 
-		if (!(caps->type & scope_mask) || !cpus_have_cap(num))
+		caps = cpu_hwcaps_ptrs[i];
+		if (!caps || !(caps->type & scope_mask))
+			continue;
+		num = caps->capability;
+		if (!cpus_have_cap(num))
 			continue;
 
 		/* Ensure cpus_have_const_cap(num) works */
 		static_branch_enable(&cpu_hwcap_keys[num]);
 
-		if (caps->cpu_enable) {
+		if (boot_scope && caps->cpu_enable)
 			/*
 			 * Capabilities with SCOPE_BOOT_CPU scope are finalised
 			 * before any secondary CPU boots. Thus, each secondary
@@ -1566,25 +1590,19 @@ __enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			 * the boot CPU, for which the capability must be
 			 * enabled here. This approach avoids costly
 			 * stop_machine() calls for this case.
-			 *
-			 * Otherwise, use stop_machine() as it schedules the
-			 * work allowing us to modify PSTATE, instead of
-			 * on_each_cpu() which uses an IPI, giving us a PSTATE
-			 * that disappears when we return.
 			 */
-			if (scope_mask & SCOPE_BOOT_CPU)
-				caps->cpu_enable(caps);
-			else
-				stop_machine(__enable_cpu_capability,
-					     (void *)caps, cpu_online_mask);
-		}
+			caps->cpu_enable(caps);
 	}
-}
 
-static void __init enable_cpu_capabilities(u16 scope_mask)
-{
-	__enable_cpu_capabilities(arm64_errata, scope_mask);
-	__enable_cpu_capabilities(arm64_features, scope_mask);
+	/*
+	 * For all non-boot scope capabilities, use stop_machine()
+	 * as it schedules the work allowing us to modify PSTATE,
+	 * instead of on_each_cpu() which uses an IPI, giving us a
+	 * PSTATE that disappears when we return.
+	 */
+	if (!boot_scope)
+		stop_machine(cpu_enable_non_boot_scope_capabilities,
+			     NULL, cpu_online_mask);
 }
 
 /*
-- 
cgit 


From bd4fb6d270bc423a9a4098108784f7f9254c4e6d Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 14 Jun 2018 11:21:34 +0100
Subject: arm64: Add support for SB barrier and patch in over DSB; ISB
 sequences

We currently use a DSB; ISB sequence to inhibit speculation in set_fs().
Whilst this works for current CPUs, future CPUs may implement a new SB
barrier instruction which acts as an architected speculation barrier.

On CPUs that support it, patch in an SB; NOP sequence over the DSB; ISB
sequence and advertise the presence of the new instruction to userspace.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/assembler.h  | 13 +++++++++++++
 arch/arm64/include/asm/barrier.h    |  4 ++++
 arch/arm64/include/asm/cpucaps.h    |  3 ++-
 arch/arm64/include/asm/sysreg.h     |  6 ++++++
 arch/arm64/include/asm/uaccess.h    |  3 +--
 arch/arm64/include/uapi/asm/hwcap.h |  1 +
 arch/arm64/kernel/cpufeature.c      | 12 ++++++++++++
 arch/arm64/kernel/cpuinfo.c         |  1 +
 8 files changed, 40 insertions(+), 3 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 6142402c2eb4..953208267252 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -122,6 +122,19 @@
 	hint	#20
 	.endm
 
+/*
+ * Speculation barrier
+ */
+	.macro	sb
+alternative_if_not ARM64_HAS_SB
+	dsb	nsh
+	isb
+alternative_else
+	SB_BARRIER_INSN
+	nop
+alternative_endif
+	.endm
+
 /*
  * Sanitise a 64-bit bounded index wrt speculation, returning zero if out
  * of bounds.
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 822a9192c551..f66bb04fdf2d 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -34,6 +34,10 @@
 #define psb_csync()	asm volatile("hint #17" : : : "memory")
 #define csdb()		asm volatile("hint #20" : : : "memory")
 
+#define spec_bar()	asm volatile(ALTERNATIVE("dsb nsh\nisb\n",		\
+						 SB_BARRIER_INSN"nop\n",	\
+						 ARM64_HAS_SB))
+
 #define mb()		dsb(sy)
 #define rmb()		dsb(ld)
 #define wmb()		dsb(st)
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 6e2d254c09eb..b7f0709a21af 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -54,7 +54,8 @@
 #define ARM64_HAS_CRC32				33
 #define ARM64_SSBS				34
 #define ARM64_WORKAROUND_1188873		35
+#define ARM64_HAS_SB				36
 
-#define ARM64_NCAPS				36
+#define ARM64_NCAPS				37
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 0c909c4a932f..78f15858535e 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -104,6 +104,11 @@
 #define SET_PSTATE_UAO(x)		__emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift))
 #define SET_PSTATE_SSBS(x)		__emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift))
 
+#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
+	__emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
+
+#define SB_BARRIER_INSN			__SYS_BARRIER_INSN(0, 7, 31)
+
 #define SYS_DC_ISW			sys_insn(1, 0, 7, 6, 2)
 #define SYS_DC_CSW			sys_insn(1, 0, 7, 10, 2)
 #define SYS_DC_CISW			sys_insn(1, 0, 7, 14, 2)
@@ -528,6 +533,7 @@
 #define ID_AA64ISAR0_AES_SHIFT		4
 
 /* id_aa64isar1 */
+#define ID_AA64ISAR1_SB_SHIFT		36
 #define ID_AA64ISAR1_LRCPC_SHIFT	20
 #define ID_AA64ISAR1_FCMA_SHIFT		16
 #define ID_AA64ISAR1_JSCVT_SHIFT	12
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 07c34087bd5e..fad33f5fde47 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -45,8 +45,7 @@ static inline void set_fs(mm_segment_t fs)
 	 * Prevent a mispredicted conditional call to set_fs from forwarding
 	 * the wrong address limit to access_ok under speculation.
 	 */
-	dsb(nsh);
-	isb();
+	spec_bar();
 
 	/* On user-mode return, check fs is correct */
 	set_thread_flag(TIF_FSCHECK);
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 2bcd6e4f3474..7784f7cba16c 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -49,5 +49,6 @@
 #define HWCAP_ILRCPC		(1 << 26)
 #define HWCAP_FLAGM		(1 << 27)
 #define HWCAP_SSBS		(1 << 28)
+#define HWCAP_SB		(1 << 29)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 1aeb62500fd4..e6467e64ee91 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -142,6 +142,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
@@ -1398,6 +1399,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.cpu_enable = cpu_enable_cnp,
 	},
 #endif
+	{
+		.desc = "Speculation barrier (SB)",
+		.capability = ARM64_HAS_SB,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64ISAR1_EL1,
+		.field_pos = ID_AA64ISAR1_SB_SHIFT,
+		.sign = FTR_UNSIGNED,
+		.min_field_value = 1,
+	},
 	{},
 };
 
@@ -1439,6 +1450,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ILRCPC),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SB),
 	HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_USCAT),
 #ifdef CONFIG_ARM64_SVE
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE),
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index bcc2831399cb..7cb0b08ab0a7 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -82,6 +82,7 @@ static const char *const hwcap_str[] = {
 	"ilrcpc",
 	"flagm",
 	"ssbs",
+	"sb",
 	NULL
 };
 
-- 
cgit 


From 21e28547f613e7ba4f4cb6831a3ead2d723fdf7b Mon Sep 17 00:00:00 2001
From: Jackie Liu <liuyun01@kylinos.cn>
Date: Tue, 4 Dec 2018 09:43:22 +0800
Subject: arm64/neon: add workaround for ambiguous C99 stdint.h types

In a way similar to ARM commit 09096f6a0ee2 ("ARM: 7822/1: add workaround
for ambiguous C99 stdint.h types"), this patch redefines the macros that
are used in stdint.h so its definitions of uint64_t and int64_t are
compatible with those of the kernel.

This patch comes from: https://patchwork.kernel.org/patch/3540001/
Wrote by: Ard Biesheuvel <ard.biesheuvel@linaro.org>

We mark this file as a private file and don't have to override asm/types.h

Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/neon-intrinsics.h | 39 ++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 arch/arm64/include/asm/neon-intrinsics.h

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/neon-intrinsics.h b/arch/arm64/include/asm/neon-intrinsics.h
new file mode 100644
index 000000000000..2ba6c6b9541f
--- /dev/null
+++ b/arch/arm64/include/asm/neon-intrinsics.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2018 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_NEON_INTRINSICS_H
+#define __ASM_NEON_INTRINSICS_H
+
+#include <asm-generic/int-ll64.h>
+
+/*
+ * In the kernel, u64/s64 are [un]signed long long, not [un]signed long.
+ * So by redefining these macros to the former, we can force gcc-stdint.h
+ * to define uint64_t / in64_t in a compatible manner.
+ */
+
+#ifdef __INT64_TYPE__
+#undef __INT64_TYPE__
+#define __INT64_TYPE__		long long
+#endif
+
+#ifdef __UINT64_TYPE__
+#undef __UINT64_TYPE__
+#define __UINT64_TYPE__		unsigned long long
+#endif
+
+/*
+ * genksyms chokes on the ARM NEON instrinsics system header, but we
+ * don't export anything it defines anyway, so just disregard when
+ * genksyms execute.
+ */
+#ifndef __GENKSYMS__
+#include <arm_neon.h>
+#endif
+
+#endif /* __ASM_NEON_INTRINSICS_H */
-- 
cgit 


From cc9f8349cb33965120a96c12e05d00676162eb7f Mon Sep 17 00:00:00 2001
From: Jackie Liu <liuyun01@kylinos.cn>
Date: Tue, 4 Dec 2018 09:43:23 +0800
Subject: arm64: crypto: add NEON accelerated XOR implementation

This is a NEON acceleration method that can improve
performance by approximately 20%. I got the following
data from the centos 7.5 on Huawei's HISI1616 chip:

[ 93.837726] xor: measuring software checksum speed
[ 93.874039]   8regs  : 7123.200 MB/sec
[ 93.914038]   32regs : 7180.300 MB/sec
[ 93.954043]   arm64_neon: 9856.000 MB/sec
[ 93.954047] xor: using function: arm64_neon (9856.000 MB/sec)

I believe this code can bring some optimization for
all arm64 platform. thanks for Ard Biesheuvel's suggestions.

Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/Kbuild |   1 -
 arch/arm64/include/asm/xor.h  |  73 +++++++++++++++++
 arch/arm64/lib/Makefile       |   6 ++
 arch/arm64/lib/xor-neon.c     | 184 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 263 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/include/asm/xor.h
 create mode 100644 arch/arm64/lib/xor-neon.c

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 6cd5d77b6b44..1877f29f6d97 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -27,4 +27,3 @@ generic-y += trace_clock.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
-generic-y += xor.h
diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h
new file mode 100644
index 000000000000..856386ad076c
--- /dev/null
+++ b/arch/arm64/include/asm/xor.h
@@ -0,0 +1,73 @@
+/*
+ * arch/arm64/include/asm/xor.h
+ *
+ * Authors: Jackie Liu <liuyun01@kylinos.cn>
+ * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+extern struct xor_block_template const xor_block_inner_neon;
+
+static void
+xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+	kernel_neon_begin();
+	xor_block_inner_neon.do_2(bytes, p1, p2);
+	kernel_neon_end();
+}
+
+static void
+xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+		unsigned long *p3)
+{
+	kernel_neon_begin();
+	xor_block_inner_neon.do_3(bytes, p1, p2, p3);
+	kernel_neon_end();
+}
+
+static void
+xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+		unsigned long *p3, unsigned long *p4)
+{
+	kernel_neon_begin();
+	xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
+	kernel_neon_end();
+}
+
+static void
+xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+		unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+	kernel_neon_begin();
+	xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);
+	kernel_neon_end();
+}
+
+static struct xor_block_template xor_block_arm64 = {
+	.name   = "arm64_neon",
+	.do_2   = xor_neon_2,
+	.do_3   = xor_neon_3,
+	.do_4   = xor_neon_4,
+	.do_5	= xor_neon_5
+};
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES           \
+	do {        \
+		xor_speed(&xor_block_8regs);    \
+		xor_speed(&xor_block_32regs);    \
+		if (cpu_has_neon()) { \
+			xor_speed(&xor_block_arm64);\
+		} \
+	} while (0)
+
+#endif /* ! CONFIG_KERNEL_MODE_NEON */
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 69ff9887f724..5540a1638baf 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -5,6 +5,12 @@ lib-y		:= clear_user.o delay.o copy_from_user.o		\
 		   memcmp.o strcmp.o strncmp.o strlen.o strnlen.o	\
 		   strchr.o strrchr.o tishift.o
 
+ifeq ($(CONFIG_KERNEL_MODE_NEON), y)
+obj-$(CONFIG_XOR_BLOCKS)	+= xor-neon.o
+CFLAGS_REMOVE_xor-neon.o	+= -mgeneral-regs-only
+CFLAGS_xor-neon.o		+= -ffreestanding
+endif
+
 # Tell the compiler to treat all general purpose registers (with the
 # exception of the IP registers, which are already handled by the caller
 # in case of a PLT) as callee-saved, which allows for efficient runtime
diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c
new file mode 100644
index 000000000000..131c60c27dff
--- /dev/null
+++ b/arch/arm64/lib/xor-neon.c
@@ -0,0 +1,184 @@
+/*
+ * arch/arm64/lib/xor-neon.c
+ *
+ * Authors: Jackie Liu <liuyun01@kylinos.cn>
+ * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/raid/xor.h>
+#include <linux/module.h>
+#include <asm/neon-intrinsics.h>
+
+void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1,
+	unsigned long *p2)
+{
+	uint64_t *dp1 = (uint64_t *)p1;
+	uint64_t *dp2 = (uint64_t *)p2;
+
+	register uint64x2_t v0, v1, v2, v3;
+	long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+	do {
+		/* p1 ^= p2 */
+		v0 = veorq_u64(vld1q_u64(dp1 +  0), vld1q_u64(dp2 +  0));
+		v1 = veorq_u64(vld1q_u64(dp1 +  2), vld1q_u64(dp2 +  2));
+		v2 = veorq_u64(vld1q_u64(dp1 +  4), vld1q_u64(dp2 +  4));
+		v3 = veorq_u64(vld1q_u64(dp1 +  6), vld1q_u64(dp2 +  6));
+
+		/* store */
+		vst1q_u64(dp1 +  0, v0);
+		vst1q_u64(dp1 +  2, v1);
+		vst1q_u64(dp1 +  4, v2);
+		vst1q_u64(dp1 +  6, v3);
+
+		dp1 += 8;
+		dp2 += 8;
+	} while (--lines > 0);
+}
+
+void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1,
+	unsigned long *p2, unsigned long *p3)
+{
+	uint64_t *dp1 = (uint64_t *)p1;
+	uint64_t *dp2 = (uint64_t *)p2;
+	uint64_t *dp3 = (uint64_t *)p3;
+
+	register uint64x2_t v0, v1, v2, v3;
+	long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+	do {
+		/* p1 ^= p2 */
+		v0 = veorq_u64(vld1q_u64(dp1 +  0), vld1q_u64(dp2 +  0));
+		v1 = veorq_u64(vld1q_u64(dp1 +  2), vld1q_u64(dp2 +  2));
+		v2 = veorq_u64(vld1q_u64(dp1 +  4), vld1q_u64(dp2 +  4));
+		v3 = veorq_u64(vld1q_u64(dp1 +  6), vld1q_u64(dp2 +  6));
+
+		/* p1 ^= p3 */
+		v0 = veorq_u64(v0, vld1q_u64(dp3 +  0));
+		v1 = veorq_u64(v1, vld1q_u64(dp3 +  2));
+		v2 = veorq_u64(v2, vld1q_u64(dp3 +  4));
+		v3 = veorq_u64(v3, vld1q_u64(dp3 +  6));
+
+		/* store */
+		vst1q_u64(dp1 +  0, v0);
+		vst1q_u64(dp1 +  2, v1);
+		vst1q_u64(dp1 +  4, v2);
+		vst1q_u64(dp1 +  6, v3);
+
+		dp1 += 8;
+		dp2 += 8;
+		dp3 += 8;
+	} while (--lines > 0);
+}
+
+void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1,
+	unsigned long *p2, unsigned long *p3, unsigned long *p4)
+{
+	uint64_t *dp1 = (uint64_t *)p1;
+	uint64_t *dp2 = (uint64_t *)p2;
+	uint64_t *dp3 = (uint64_t *)p3;
+	uint64_t *dp4 = (uint64_t *)p4;
+
+	register uint64x2_t v0, v1, v2, v3;
+	long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+	do {
+		/* p1 ^= p2 */
+		v0 = veorq_u64(vld1q_u64(dp1 +  0), vld1q_u64(dp2 +  0));
+		v1 = veorq_u64(vld1q_u64(dp1 +  2), vld1q_u64(dp2 +  2));
+		v2 = veorq_u64(vld1q_u64(dp1 +  4), vld1q_u64(dp2 +  4));
+		v3 = veorq_u64(vld1q_u64(dp1 +  6), vld1q_u64(dp2 +  6));
+
+		/* p1 ^= p3 */
+		v0 = veorq_u64(v0, vld1q_u64(dp3 +  0));
+		v1 = veorq_u64(v1, vld1q_u64(dp3 +  2));
+		v2 = veorq_u64(v2, vld1q_u64(dp3 +  4));
+		v3 = veorq_u64(v3, vld1q_u64(dp3 +  6));
+
+		/* p1 ^= p4 */
+		v0 = veorq_u64(v0, vld1q_u64(dp4 +  0));
+		v1 = veorq_u64(v1, vld1q_u64(dp4 +  2));
+		v2 = veorq_u64(v2, vld1q_u64(dp4 +  4));
+		v3 = veorq_u64(v3, vld1q_u64(dp4 +  6));
+
+		/* store */
+		vst1q_u64(dp1 +  0, v0);
+		vst1q_u64(dp1 +  2, v1);
+		vst1q_u64(dp1 +  4, v2);
+		vst1q_u64(dp1 +  6, v3);
+
+		dp1 += 8;
+		dp2 += 8;
+		dp3 += 8;
+		dp4 += 8;
+	} while (--lines > 0);
+}
+
+void xor_arm64_neon_5(unsigned long bytes, unsigned long *p1,
+	unsigned long *p2, unsigned long *p3,
+	unsigned long *p4, unsigned long *p5)
+{
+	uint64_t *dp1 = (uint64_t *)p1;
+	uint64_t *dp2 = (uint64_t *)p2;
+	uint64_t *dp3 = (uint64_t *)p3;
+	uint64_t *dp4 = (uint64_t *)p4;
+	uint64_t *dp5 = (uint64_t *)p5;
+
+	register uint64x2_t v0, v1, v2, v3;
+	long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+	do {
+		/* p1 ^= p2 */
+		v0 = veorq_u64(vld1q_u64(dp1 +  0), vld1q_u64(dp2 +  0));
+		v1 = veorq_u64(vld1q_u64(dp1 +  2), vld1q_u64(dp2 +  2));
+		v2 = veorq_u64(vld1q_u64(dp1 +  4), vld1q_u64(dp2 +  4));
+		v3 = veorq_u64(vld1q_u64(dp1 +  6), vld1q_u64(dp2 +  6));
+
+		/* p1 ^= p3 */
+		v0 = veorq_u64(v0, vld1q_u64(dp3 +  0));
+		v1 = veorq_u64(v1, vld1q_u64(dp3 +  2));
+		v2 = veorq_u64(v2, vld1q_u64(dp3 +  4));
+		v3 = veorq_u64(v3, vld1q_u64(dp3 +  6));
+
+		/* p1 ^= p4 */
+		v0 = veorq_u64(v0, vld1q_u64(dp4 +  0));
+		v1 = veorq_u64(v1, vld1q_u64(dp4 +  2));
+		v2 = veorq_u64(v2, vld1q_u64(dp4 +  4));
+		v3 = veorq_u64(v3, vld1q_u64(dp4 +  6));
+
+		/* p1 ^= p5 */
+		v0 = veorq_u64(v0, vld1q_u64(dp5 +  0));
+		v1 = veorq_u64(v1, vld1q_u64(dp5 +  2));
+		v2 = veorq_u64(v2, vld1q_u64(dp5 +  4));
+		v3 = veorq_u64(v3, vld1q_u64(dp5 +  6));
+
+		/* store */
+		vst1q_u64(dp1 +  0, v0);
+		vst1q_u64(dp1 +  2, v1);
+		vst1q_u64(dp1 +  4, v2);
+		vst1q_u64(dp1 +  6, v3);
+
+		dp1 += 8;
+		dp2 += 8;
+		dp3 += 8;
+		dp4 += 8;
+		dp5 += 8;
+	} while (--lines > 0);
+}
+
+struct xor_block_template const xor_block_inner_neon = {
+	.name	= "__inner_neon__",
+	.do_2	= xor_arm64_neon_2,
+	.do_3	= xor_arm64_neon_3,
+	.do_4	= xor_arm64_neon_4,
+	.do_5	= xor_arm64_neon_5,
+};
+EXPORT_SYMBOL(xor_block_inner_neon);
+
+MODULE_AUTHOR("Jackie Liu <liuyun01@kylinos.cn>");
+MODULE_DESCRIPTION("ARMv8 XOR Extensions");
+MODULE_LICENSE("GPL");
-- 
cgit 


From 396244692232fcf0881cb6ba2404be2906f47681 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 20 Sep 2018 10:26:40 +0100
Subject: arm64: preempt: Provide our own implementation of asm/preempt.h

The asm-generic/preempt.h implementation doesn't make use of the
PREEMPT_NEED_RESCHED flag, since this can interact badly with load/store
architectures which rely on the preempt_count word being unchanged across
an interrupt.

However, since we're a 64-bit architecture and the preempt count is
only 32 bits wide, we can simply pack it next to the resched flag and
load the whole thing in one go, so that a dec-and-test operation doesn't
need to load twice.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/Kbuild        |  1 -
 arch/arm64/include/asm/preempt.h     | 89 ++++++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/thread_info.h | 13 +++++-
 3 files changed, 101 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm64/include/asm/preempt.h

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 1877f29f6d97..1e17ea5c372b 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -14,7 +14,6 @@ generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += msi.h
-generic-y += preempt.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
 generic-y += rwsem.h
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
new file mode 100644
index 000000000000..d49951647014
--- /dev/null
+++ b/arch/arm64/include/asm/preempt.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include <linux/thread_info.h>
+
+#define PREEMPT_NEED_RESCHED	BIT(32)
+#define PREEMPT_ENABLED	(PREEMPT_NEED_RESCHED)
+
+static inline int preempt_count(void)
+{
+	return READ_ONCE(current_thread_info()->preempt.count);
+}
+
+static inline void preempt_count_set(u64 pc)
+{
+	/* Preserve existing value of PREEMPT_NEED_RESCHED */
+	WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+#define init_task_preempt_count(p) do { \
+	task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \
+} while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+	task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
+} while (0)
+
+static inline void set_preempt_need_resched(void)
+{
+	current_thread_info()->preempt.need_resched = 0;
+}
+
+static inline void clear_preempt_need_resched(void)
+{
+	current_thread_info()->preempt.need_resched = 1;
+}
+
+static inline bool test_preempt_need_resched(void)
+{
+	return !current_thread_info()->preempt.need_resched;
+}
+
+static inline void __preempt_count_add(int val)
+{
+	u32 pc = READ_ONCE(current_thread_info()->preempt.count);
+	pc += val;
+	WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+static inline void __preempt_count_sub(int val)
+{
+	u32 pc = READ_ONCE(current_thread_info()->preempt.count);
+	pc -= val;
+	WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+static inline bool __preempt_count_dec_and_test(void)
+{
+	struct thread_info *ti = current_thread_info();
+	u64 pc = READ_ONCE(ti->preempt_count);
+
+	/* Update only the count field, leaving need_resched unchanged */
+	WRITE_ONCE(ti->preempt.count, --pc);
+
+	/*
+	 * If we wrote back all zeroes, then we're preemptible and in
+	 * need of a reschedule. Otherwise, we need to reload the
+	 * preempt_count in case the need_resched flag was cleared by an
+	 * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
+	 * pair.
+	 */
+	return !pc || !READ_ONCE(ti->preempt_count);
+}
+
+static inline bool should_resched(int preempt_offset)
+{
+	u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+	return pc == preempt_offset;
+}
+
+#ifdef CONFIG_PREEMPT
+void preempt_schedule(void);
+#define __preempt_schedule() preempt_schedule()
+void preempt_schedule_notrace(void);
+#define __preempt_schedule_notrace() preempt_schedule_notrace()
+#endif /* CONFIG_PREEMPT */
+
+#endif /* __ASM_PREEMPT_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index cb2c10a8f0a8..bbca68b54732 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -42,7 +42,18 @@ struct thread_info {
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
 	u64			ttbr0;		/* saved TTBR0_EL1 */
 #endif
-	int			preempt_count;	/* 0 => preemptable, <0 => bug */
+	union {
+		u64		preempt_count;	/* 0 => preemptible, <0 => bug */
+		struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+			u32	need_resched;
+			u32	count;
+#else
+			u32	count;
+			u32	need_resched;
+#endif
+		} preempt;
+	};
 };
 
 #define thread_saved_pc(tsk)	\
-- 
cgit 


From 5ef3fe4cecdf82fdd71ce78988403963d01444d4 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 13 Sep 2018 13:30:45 +0100
Subject: arm64: Avoid redundant type conversions in xchg() and cmpxchg()

Our atomic instructions (either LSE atomics of LDXR/STXR sequences)
natively support byte, half-word, word and double-word memory accesses
so there is no need to mask the data register prior to being stored.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/atomic_ll_sc.h |  53 ++++++++--------
 arch/arm64/include/asm/atomic_lse.h   |  46 +++++++-------
 arch/arm64/include/asm/cmpxchg.h      | 116 +++++++++++++++++-----------------
 3 files changed, 108 insertions(+), 107 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index f5a2d09afb38..f02d3bf7b9e6 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -248,48 +248,49 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
 }
 __LL_SC_EXPORT(atomic64_dec_if_positive);
 
-#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl)			\
-__LL_SC_INLINE unsigned long						\
-__LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,		\
-				     unsigned long old,			\
-				     unsigned long new))		\
+#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl)		\
+__LL_SC_INLINE u##sz							\
+__LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr,		\
+					 unsigned long old,		\
+					 u##sz new))			\
 {									\
-	unsigned long tmp, oldval;					\
+	unsigned long tmp;						\
+	u##sz oldval;							\
 									\
 	asm volatile(							\
 	"	prfm	pstl1strm, %[v]\n"				\
-	"1:	ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n"		\
+	"1:	ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n"		\
 	"	eor	%" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"	\
 	"	cbnz	%" #w "[tmp], 2f\n"				\
-	"	st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n"	\
+	"	st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n"	\
 	"	cbnz	%w[tmp], 1b\n"					\
 	"	" #mb "\n"						\
 	"2:"								\
 	: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),			\
-	  [v] "+Q" (*(unsigned long *)ptr)				\
+	  [v] "+Q" (*(u##sz *)ptr)					\
 	: [old] "Lr" (old), [new] "r" (new)				\
 	: cl);								\
 									\
 	return oldval;							\
 }									\
-__LL_SC_EXPORT(__cmpxchg_case_##name);
+__LL_SC_EXPORT(__cmpxchg_case_##name##sz);
 
-__CMPXCHG_CASE(w, b,     1,        ,  ,  ,         )
-__CMPXCHG_CASE(w, h,     2,        ,  ,  ,         )
-__CMPXCHG_CASE(w,  ,     4,        ,  ,  ,         )
-__CMPXCHG_CASE( ,  ,     8,        ,  ,  ,         )
-__CMPXCHG_CASE(w, b, acq_1,        , a,  , "memory")
-__CMPXCHG_CASE(w, h, acq_2,        , a,  , "memory")
-__CMPXCHG_CASE(w,  , acq_4,        , a,  , "memory")
-__CMPXCHG_CASE( ,  , acq_8,        , a,  , "memory")
-__CMPXCHG_CASE(w, b, rel_1,        ,  , l, "memory")
-__CMPXCHG_CASE(w, h, rel_2,        ,  , l, "memory")
-__CMPXCHG_CASE(w,  , rel_4,        ,  , l, "memory")
-__CMPXCHG_CASE( ,  , rel_8,        ,  , l, "memory")
-__CMPXCHG_CASE(w, b,  mb_1, dmb ish,  , l, "memory")
-__CMPXCHG_CASE(w, h,  mb_2, dmb ish,  , l, "memory")
-__CMPXCHG_CASE(w,  ,  mb_4, dmb ish,  , l, "memory")
-__CMPXCHG_CASE( ,  ,  mb_8, dmb ish,  , l, "memory")
+__CMPXCHG_CASE(w, b,     ,  8,        ,  ,  ,         )
+__CMPXCHG_CASE(w, h,     , 16,        ,  ,  ,         )
+__CMPXCHG_CASE(w,  ,     , 32,        ,  ,  ,         )
+__CMPXCHG_CASE( ,  ,     , 64,        ,  ,  ,         )
+__CMPXCHG_CASE(w, b, acq_,  8,        , a,  , "memory")
+__CMPXCHG_CASE(w, h, acq_, 16,        , a,  , "memory")
+__CMPXCHG_CASE(w,  , acq_, 32,        , a,  , "memory")
+__CMPXCHG_CASE( ,  , acq_, 64,        , a,  , "memory")
+__CMPXCHG_CASE(w, b, rel_,  8,        ,  , l, "memory")
+__CMPXCHG_CASE(w, h, rel_, 16,        ,  , l, "memory")
+__CMPXCHG_CASE(w,  , rel_, 32,        ,  , l, "memory")
+__CMPXCHG_CASE( ,  , rel_, 64,        ,  , l, "memory")
+__CMPXCHG_CASE(w, b,  mb_,  8, dmb ish,  , l, "memory")
+__CMPXCHG_CASE(w, h,  mb_, 16, dmb ish,  , l, "memory")
+__CMPXCHG_CASE(w,  ,  mb_, 32, dmb ish,  , l, "memory")
+__CMPXCHG_CASE( ,  ,  mb_, 64, dmb ish,  , l, "memory")
 
 #undef __CMPXCHG_CASE
 
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index f9b0b09153e0..4d6f917b654e 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -446,22 +446,22 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 
 #define __LL_SC_CMPXCHG(op)	__LL_SC_CALL(__cmpxchg_case_##op)
 
-#define __CMPXCHG_CASE(w, sz, name, mb, cl...)				\
-static inline unsigned long __cmpxchg_case_##name(volatile void *ptr,	\
-						  unsigned long old,	\
-						  unsigned long new)	\
+#define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...)			\
+static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr,	\
+					      unsigned long old,	\
+					      u##sz new)		\
 {									\
 	register unsigned long x0 asm ("x0") = (unsigned long)ptr;	\
 	register unsigned long x1 asm ("x1") = old;			\
-	register unsigned long x2 asm ("x2") = new;			\
+	register u##sz x2 asm ("x2") = new;				\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	__LL_SC_CMPXCHG(name)						\
+	__LL_SC_CMPXCHG(name##sz)					\
 	__nops(2),							\
 	/* LSE atomics */						\
 	"	mov	" #w "30, %" #w "[old]\n"			\
-	"	cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n"		\
+	"	cas" #mb #sfx "\t" #w "30, %" #w "[new], %[v]\n"	\
 	"	mov	%" #w "[ret], " #w "30")			\
 	: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr)		\
 	: [old] "r" (x1), [new] "r" (x2)				\
@@ -470,22 +470,22 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr,	\
 	return x0;							\
 }
 
-__CMPXCHG_CASE(w, b,     1,   )
-__CMPXCHG_CASE(w, h,     2,   )
-__CMPXCHG_CASE(w,  ,     4,   )
-__CMPXCHG_CASE(x,  ,     8,   )
-__CMPXCHG_CASE(w, b, acq_1,  a, "memory")
-__CMPXCHG_CASE(w, h, acq_2,  a, "memory")
-__CMPXCHG_CASE(w,  , acq_4,  a, "memory")
-__CMPXCHG_CASE(x,  , acq_8,  a, "memory")
-__CMPXCHG_CASE(w, b, rel_1,  l, "memory")
-__CMPXCHG_CASE(w, h, rel_2,  l, "memory")
-__CMPXCHG_CASE(w,  , rel_4,  l, "memory")
-__CMPXCHG_CASE(x,  , rel_8,  l, "memory")
-__CMPXCHG_CASE(w, b,  mb_1, al, "memory")
-__CMPXCHG_CASE(w, h,  mb_2, al, "memory")
-__CMPXCHG_CASE(w,  ,  mb_4, al, "memory")
-__CMPXCHG_CASE(x,  ,  mb_8, al, "memory")
+__CMPXCHG_CASE(w, b,     ,  8,   )
+__CMPXCHG_CASE(w, h,     , 16,   )
+__CMPXCHG_CASE(w,  ,     , 32,   )
+__CMPXCHG_CASE(x,  ,     , 64,   )
+__CMPXCHG_CASE(w, b, acq_,  8,  a, "memory")
+__CMPXCHG_CASE(w, h, acq_, 16,  a, "memory")
+__CMPXCHG_CASE(w,  , acq_, 32,  a, "memory")
+__CMPXCHG_CASE(x,  , acq_, 64,  a, "memory")
+__CMPXCHG_CASE(w, b, rel_,  8,  l, "memory")
+__CMPXCHG_CASE(w, h, rel_, 16,  l, "memory")
+__CMPXCHG_CASE(w,  , rel_, 32,  l, "memory")
+__CMPXCHG_CASE(x,  , rel_, 64,  l, "memory")
+__CMPXCHG_CASE(w, b,  mb_,  8, al, "memory")
+__CMPXCHG_CASE(w, h,  mb_, 16, al, "memory")
+__CMPXCHG_CASE(w,  ,  mb_, 32, al, "memory")
+__CMPXCHG_CASE(x,  ,  mb_, 64, al, "memory")
 
 #undef __LL_SC_CMPXCHG
 #undef __CMPXCHG_CASE
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 3b0938281541..1f0340fc6dad 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -30,46 +30,46 @@
  * barrier case is generated as release+dmb for the former and
  * acquire+release for the latter.
  */
-#define __XCHG_CASE(w, sz, name, mb, nop_lse, acq, acq_lse, rel, cl)	\
-static inline unsigned long __xchg_case_##name(unsigned long x,		\
-					       volatile void *ptr)	\
-{									\
-	unsigned long ret, tmp;						\
-									\
-	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
-	/* LL/SC */							\
-	"	prfm	pstl1strm, %2\n"				\
-	"1:	ld" #acq "xr" #sz "\t%" #w "0, %2\n"			\
-	"	st" #rel "xr" #sz "\t%w1, %" #w "3, %2\n"		\
-	"	cbnz	%w1, 1b\n"					\
-	"	" #mb,							\
-	/* LSE atomics */						\
-	"	swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n"	\
-		__nops(3)						\
-	"	" #nop_lse)						\
-	: "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned long *)ptr)	\
-	: "r" (x)							\
-	: cl);								\
-									\
-	return ret;							\
+#define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl)	\
+static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)		\
+{										\
+	u##sz ret;								\
+	unsigned long tmp;							\
+										\
+	asm volatile(ARM64_LSE_ATOMIC_INSN(					\
+	/* LL/SC */								\
+	"	prfm	pstl1strm, %2\n"					\
+	"1:	ld" #acq "xr" #sfx "\t%" #w "0, %2\n"				\
+	"	st" #rel "xr" #sfx "\t%w1, %" #w "3, %2\n"			\
+	"	cbnz	%w1, 1b\n"						\
+	"	" #mb,								\
+	/* LSE atomics */							\
+	"	swp" #acq_lse #rel #sfx "\t%" #w "3, %" #w "0, %2\n"		\
+		__nops(3)							\
+	"	" #nop_lse)							\
+	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u##sz *)ptr)			\
+	: "r" (x)								\
+	: cl);									\
+										\
+	return ret;								\
 }
 
-__XCHG_CASE(w, b,     1,        ,    ,  ,  ,  ,         )
-__XCHG_CASE(w, h,     2,        ,    ,  ,  ,  ,         )
-__XCHG_CASE(w,  ,     4,        ,    ,  ,  ,  ,         )
-__XCHG_CASE( ,  ,     8,        ,    ,  ,  ,  ,         )
-__XCHG_CASE(w, b, acq_1,        ,    , a, a,  , "memory")
-__XCHG_CASE(w, h, acq_2,        ,    , a, a,  , "memory")
-__XCHG_CASE(w,  , acq_4,        ,    , a, a,  , "memory")
-__XCHG_CASE( ,  , acq_8,        ,    , a, a,  , "memory")
-__XCHG_CASE(w, b, rel_1,        ,    ,  ,  , l, "memory")
-__XCHG_CASE(w, h, rel_2,        ,    ,  ,  , l, "memory")
-__XCHG_CASE(w,  , rel_4,        ,    ,  ,  , l, "memory")
-__XCHG_CASE( ,  , rel_8,        ,    ,  ,  , l, "memory")
-__XCHG_CASE(w, b,  mb_1, dmb ish, nop,  , a, l, "memory")
-__XCHG_CASE(w, h,  mb_2, dmb ish, nop,  , a, l, "memory")
-__XCHG_CASE(w,  ,  mb_4, dmb ish, nop,  , a, l, "memory")
-__XCHG_CASE( ,  ,  mb_8, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w, b,     ,  8,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, h,     , 16,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w,  ,     , 32,        ,    ,  ,  ,  ,         )
+__XCHG_CASE( ,  ,     , 64,        ,    ,  ,  ,  ,         )
+__XCHG_CASE(w, b, acq_,  8,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, h, acq_, 16,        ,    , a, a,  , "memory")
+__XCHG_CASE(w,  , acq_, 32,        ,    , a, a,  , "memory")
+__XCHG_CASE( ,  , acq_, 64,        ,    , a, a,  , "memory")
+__XCHG_CASE(w, b, rel_,  8,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, h, rel_, 16,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w,  , rel_, 32,        ,    ,  ,  , l, "memory")
+__XCHG_CASE( ,  , rel_, 64,        ,    ,  ,  , l, "memory")
+__XCHG_CASE(w, b,  mb_,  8, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w, h,  mb_, 16, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE(w,  ,  mb_, 32, dmb ish, nop,  , a, l, "memory")
+__XCHG_CASE( ,  ,  mb_, 64, dmb ish, nop,  , a, l, "memory")
 
 #undef __XCHG_CASE
 
@@ -80,13 +80,13 @@ static inline unsigned long __xchg##sfx(unsigned long x,		\
 {									\
 	switch (size) {							\
 	case 1:								\
-		return __xchg_case##sfx##_1(x, ptr);			\
+		return __xchg_case##sfx##_8(x, ptr);			\
 	case 2:								\
-		return __xchg_case##sfx##_2(x, ptr);			\
+		return __xchg_case##sfx##_16(x, ptr);			\
 	case 4:								\
-		return __xchg_case##sfx##_4(x, ptr);			\
+		return __xchg_case##sfx##_32(x, ptr);			\
 	case 8:								\
-		return __xchg_case##sfx##_8(x, ptr);			\
+		return __xchg_case##sfx##_64(x, ptr);			\
 	default:							\
 		BUILD_BUG();						\
 	}								\
@@ -123,13 +123,13 @@ static inline unsigned long __cmpxchg##sfx(volatile void *ptr,		\
 {									\
 	switch (size) {							\
 	case 1:								\
-		return __cmpxchg_case##sfx##_1(ptr, (u8)old, new);	\
+		return __cmpxchg_case##sfx##_8(ptr, (u8)old, new);	\
 	case 2:								\
-		return __cmpxchg_case##sfx##_2(ptr, (u16)old, new);	\
+		return __cmpxchg_case##sfx##_16(ptr, (u16)old, new);	\
 	case 4:								\
-		return __cmpxchg_case##sfx##_4(ptr, old, new);		\
+		return __cmpxchg_case##sfx##_32(ptr, old, new);		\
 	case 8:								\
-		return __cmpxchg_case##sfx##_8(ptr, old, new);		\
+		return __cmpxchg_case##sfx##_64(ptr, old, new);		\
 	default:							\
 		BUILD_BUG();						\
 	}								\
@@ -197,16 +197,16 @@ __CMPXCHG_GEN(_mb)
 	__ret; \
 })
 
-#define __CMPWAIT_CASE(w, sz, name)					\
-static inline void __cmpwait_case_##name(volatile void *ptr,		\
-					 unsigned long val)		\
+#define __CMPWAIT_CASE(w, sfx, sz)					\
+static inline void __cmpwait_case_##sz(volatile void *ptr,		\
+				       unsigned long val)		\
 {									\
 	unsigned long tmp;						\
 									\
 	asm volatile(							\
 	"	sevl\n"							\
 	"	wfe\n"							\
-	"	ldxr" #sz "\t%" #w "[tmp], %[v]\n"			\
+	"	ldxr" #sfx "\t%" #w "[tmp], %[v]\n"			\
 	"	eor	%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n"	\
 	"	cbnz	%" #w "[tmp], 1f\n"				\
 	"	wfe\n"							\
@@ -215,10 +215,10 @@ static inline void __cmpwait_case_##name(volatile void *ptr,		\
 	: [val] "r" (val));						\
 }
 
-__CMPWAIT_CASE(w, b, 1);
-__CMPWAIT_CASE(w, h, 2);
-__CMPWAIT_CASE(w,  , 4);
-__CMPWAIT_CASE( ,  , 8);
+__CMPWAIT_CASE(w, b, 8);
+__CMPWAIT_CASE(w, h, 16);
+__CMPWAIT_CASE(w,  , 32);
+__CMPWAIT_CASE( ,  , 64);
 
 #undef __CMPWAIT_CASE
 
@@ -229,13 +229,13 @@ static inline void __cmpwait##sfx(volatile void *ptr,			\
 {									\
 	switch (size) {							\
 	case 1:								\
-		return __cmpwait_case##sfx##_1(ptr, (u8)val);		\
+		return __cmpwait_case##sfx##_8(ptr, (u8)val);		\
 	case 2:								\
-		return __cmpwait_case##sfx##_2(ptr, (u16)val);		\
+		return __cmpwait_case##sfx##_16(ptr, (u16)val);		\
 	case 4:								\
-		return __cmpwait_case##sfx##_4(ptr, val);		\
+		return __cmpwait_case##sfx##_32(ptr, val);		\
 	case 8:								\
-		return __cmpwait_case##sfx##_8(ptr, val);		\
+		return __cmpwait_case##sfx##_64(ptr, val);		\
 	default:							\
 		BUILD_BUG();						\
 	}								\
-- 
cgit 


From b4f9209bfcd5964551de434342818334ab9c8c7e Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 13 Sep 2018 14:28:33 +0100
Subject: arm64: Avoid masking "old" for LSE cmpxchg() implementation

The CAS instructions implicitly access only the relevant bits of the "old"
argument, so there is no need for explicit masking via type-casting as
there is in the LL/SC implementation.

Move the casting into the LL/SC code and remove it altogether for the LSE
implementation.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/atomic_ll_sc.h | 8 ++++++++
 arch/arm64/include/asm/atomic_lse.h   | 4 ++--
 arch/arm64/include/asm/cmpxchg.h      | 4 ++--
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index f02d3bf7b9e6..b53f70dd6e10 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -257,6 +257,14 @@ __LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr,		\
 	unsigned long tmp;						\
 	u##sz oldval;							\
 									\
+	/*								\
+	 * Sub-word sizes require explicit casting so that the compare  \
+	 * part of the cmpxchg doesn't end up interpreting non-zero	\
+	 * upper bits of the register containing "old".			\
+	 */								\
+	if (sz < 32)							\
+		old = (u##sz)old;					\
+									\
 	asm volatile(							\
 	"	prfm	pstl1strm, %[v]\n"				\
 	"1:	ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n"		\
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index 4d6f917b654e..a424355240c5 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -448,11 +448,11 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 
 #define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...)			\
 static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr,	\
-					      unsigned long old,	\
+					      u##sz old,		\
 					      u##sz new)		\
 {									\
 	register unsigned long x0 asm ("x0") = (unsigned long)ptr;	\
-	register unsigned long x1 asm ("x1") = old;			\
+	register u##sz x1 asm ("x1") = old;				\
 	register u##sz x2 asm ("x2") = new;				\
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 1f0340fc6dad..3f9376f1c409 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -123,9 +123,9 @@ static inline unsigned long __cmpxchg##sfx(volatile void *ptr,		\
 {									\
 	switch (size) {							\
 	case 1:								\
-		return __cmpxchg_case##sfx##_8(ptr, (u8)old, new);	\
+		return __cmpxchg_case##sfx##_8(ptr, old, new);		\
 	case 2:								\
-		return __cmpxchg_case##sfx##_16(ptr, (u16)old, new);	\
+		return __cmpxchg_case##sfx##_16(ptr, old, new);		\
 	case 4:								\
 		return __cmpxchg_case##sfx##_32(ptr, old, new);		\
 	case 8:								\
-- 
cgit 


From 959bf2fd03b59fc107584c21425f3dc73c49f762 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 13 Sep 2018 15:56:16 +0100
Subject: arm64: percpu: Rewrite per-cpu ops to allow use of LSE atomics

Our percpu code is a bit of an inconsistent mess:

  * It rolls its own xchg(), but reuses cmpxchg_local()
  * It uses various different flavours of preempt_{enable,disable}()
  * It returns values even for the non-returning RmW operations
  * It makes no use of LSE atomics outside of the cmpxchg() ops
  * There are individual macros for different sizes of access, but these
    are all funneled through a switch statement rather than dispatched
    directly to the relevant case

This patch rewrites the per-cpu operations to address these shortcomings.
Whilst the new code is a lot cleaner, the big advantage is that we can
use the non-returning ST- atomic instructions when we have LSE.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/percpu.h | 390 +++++++++++++++++-----------------------
 1 file changed, 160 insertions(+), 230 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 21a81b59a0cc..f7b1bbbb6f12 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -48,263 +48,193 @@ static inline unsigned long __my_cpu_offset(void)
 }
 #define __my_cpu_offset __my_cpu_offset()
 
-#define PERCPU_OP(op, asm_op)						\
-static inline unsigned long __percpu_##op(void *ptr,			\
-			unsigned long val, int size)			\
+#define PERCPU_RW_OPS(sz)						\
+static inline unsigned long __percpu_read_##sz(void *ptr)		\
 {									\
-	unsigned long loop, ret;					\
+	return READ_ONCE(*(u##sz *)ptr);				\
+}									\
 									\
-	switch (size) {							\
-	case 1:								\
-		asm ("//__per_cpu_" #op "_1\n"				\
-		"1:	ldxrb	  %w[ret], %[ptr]\n"			\
-			#asm_op " %w[ret], %w[ret], %w[val]\n"		\
-		"	stxrb	  %w[loop], %w[ret], %[ptr]\n"		\
-		"	cbnz	  %w[loop], 1b"				\
-		: [loop] "=&r" (loop), [ret] "=&r" (ret),		\
-		  [ptr] "+Q"(*(u8 *)ptr)				\
-		: [val] "Ir" (val));					\
-		break;							\
-	case 2:								\
-		asm ("//__per_cpu_" #op "_2\n"				\
-		"1:	ldxrh	  %w[ret], %[ptr]\n"			\
-			#asm_op " %w[ret], %w[ret], %w[val]\n"		\
-		"	stxrh	  %w[loop], %w[ret], %[ptr]\n"		\
-		"	cbnz	  %w[loop], 1b"				\
-		: [loop] "=&r" (loop), [ret] "=&r" (ret),		\
-		  [ptr]  "+Q"(*(u16 *)ptr)				\
-		: [val] "Ir" (val));					\
-		break;							\
-	case 4:								\
-		asm ("//__per_cpu_" #op "_4\n"				\
-		"1:	ldxr	  %w[ret], %[ptr]\n"			\
-			#asm_op " %w[ret], %w[ret], %w[val]\n"		\
-		"	stxr	  %w[loop], %w[ret], %[ptr]\n"		\
-		"	cbnz	  %w[loop], 1b"				\
-		: [loop] "=&r" (loop), [ret] "=&r" (ret),		\
-		  [ptr] "+Q"(*(u32 *)ptr)				\
-		: [val] "Ir" (val));					\
-		break;							\
-	case 8:								\
-		asm ("//__per_cpu_" #op "_8\n"				\
-		"1:	ldxr	  %[ret], %[ptr]\n"			\
-			#asm_op " %[ret], %[ret], %[val]\n"		\
-		"	stxr	  %w[loop], %[ret], %[ptr]\n"		\
-		"	cbnz	  %w[loop], 1b"				\
-		: [loop] "=&r" (loop), [ret] "=&r" (ret),		\
-		  [ptr] "+Q"(*(u64 *)ptr)				\
-		: [val] "Ir" (val));					\
-		break;							\
-	default:							\
-		ret = 0;						\
-		BUILD_BUG();						\
-	}								\
-									\
-	return ret;							\
-}
-
-PERCPU_OP(add, add)
-PERCPU_OP(and, and)
-PERCPU_OP(or, orr)
-#undef PERCPU_OP
-
-static inline unsigned long __percpu_read(void *ptr, int size)
-{
-	unsigned long ret;
-
-	switch (size) {
-	case 1:
-		ret = READ_ONCE(*(u8 *)ptr);
-		break;
-	case 2:
-		ret = READ_ONCE(*(u16 *)ptr);
-		break;
-	case 4:
-		ret = READ_ONCE(*(u32 *)ptr);
-		break;
-	case 8:
-		ret = READ_ONCE(*(u64 *)ptr);
-		break;
-	default:
-		ret = 0;
-		BUILD_BUG();
-	}
-
-	return ret;
+static inline void __percpu_write_##sz(void *ptr, unsigned long val)	\
+{									\
+	WRITE_ONCE(*(u##sz *)ptr, (u##sz)val);				\
 }
 
-static inline void __percpu_write(void *ptr, unsigned long val, int size)
-{
-	switch (size) {
-	case 1:
-		WRITE_ONCE(*(u8 *)ptr, (u8)val);
-		break;
-	case 2:
-		WRITE_ONCE(*(u16 *)ptr, (u16)val);
-		break;
-	case 4:
-		WRITE_ONCE(*(u32 *)ptr, (u32)val);
-		break;
-	case 8:
-		WRITE_ONCE(*(u64 *)ptr, (u64)val);
-		break;
-	default:
-		BUILD_BUG();
-	}
+#define __PERCPU_OP_CASE(w, sfx, name, sz, op_llsc, op_lse)		\
+static inline void							\
+__percpu_##name##_case_##sz(void *ptr, unsigned long val)		\
+{									\
+	unsigned int loop;						\
+	u##sz tmp;							\
+									\
+	asm volatile (ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"1:	ldxr" #sfx "\t%" #w "[tmp], %[ptr]\n"			\
+		#op_llsc "\t%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n"	\
+	"	stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n"		\
+	"	cbnz	%w[loop], 1b",					\
+	/* LSE atomics */						\
+		#op_lse "\t%" #w "[val], %[ptr]\n"			\
+		__nops(3))						\
+	: [loop] "=&r" (loop), [tmp] "=&r" (tmp),			\
+	  [ptr] "+Q"(*(u##sz *)ptr)					\
+	: [val] "r" ((u##sz)(val)));					\
 }
 
-static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
-						int size)
-{
-	unsigned long ret, loop;
-
-	switch (size) {
-	case 1:
-		asm ("//__percpu_xchg_1\n"
-		"1:	ldxrb	%w[ret], %[ptr]\n"
-		"	stxrb	%w[loop], %w[val], %[ptr]\n"
-		"	cbnz	%w[loop], 1b"
-		: [loop] "=&r"(loop), [ret] "=&r"(ret),
-		  [ptr] "+Q"(*(u8 *)ptr)
-		: [val] "r" (val));
-		break;
-	case 2:
-		asm ("//__percpu_xchg_2\n"
-		"1:	ldxrh	%w[ret], %[ptr]\n"
-		"	stxrh	%w[loop], %w[val], %[ptr]\n"
-		"	cbnz	%w[loop], 1b"
-		: [loop] "=&r"(loop), [ret] "=&r"(ret),
-		  [ptr] "+Q"(*(u16 *)ptr)
-		: [val] "r" (val));
-		break;
-	case 4:
-		asm ("//__percpu_xchg_4\n"
-		"1:	ldxr	%w[ret], %[ptr]\n"
-		"	stxr	%w[loop], %w[val], %[ptr]\n"
-		"	cbnz	%w[loop], 1b"
-		: [loop] "=&r"(loop), [ret] "=&r"(ret),
-		  [ptr] "+Q"(*(u32 *)ptr)
-		: [val] "r" (val));
-		break;
-	case 8:
-		asm ("//__percpu_xchg_8\n"
-		"1:	ldxr	%[ret], %[ptr]\n"
-		"	stxr	%w[loop], %[val], %[ptr]\n"
-		"	cbnz	%w[loop], 1b"
-		: [loop] "=&r"(loop), [ret] "=&r"(ret),
-		  [ptr] "+Q"(*(u64 *)ptr)
-		: [val] "r" (val));
-		break;
-	default:
-		ret = 0;
-		BUILD_BUG();
-	}
-
-	return ret;
+#define __PERCPU_RET_OP_CASE(w, sfx, name, sz, op_llsc, op_lse)		\
+static inline u##sz							\
+__percpu_##name##_return_case_##sz(void *ptr, unsigned long val)	\
+{									\
+	unsigned int loop;						\
+	u##sz ret;							\
+									\
+	asm volatile (ARM64_LSE_ATOMIC_INSN(				\
+	/* LL/SC */							\
+	"1:	ldxr" #sfx "\t%" #w "[ret], %[ptr]\n"			\
+		#op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n"	\
+	"	stxr" #sfx "\t%w[loop], %" #w "[ret], %[ptr]\n"		\
+	"	cbnz	%w[loop], 1b",					\
+	/* LSE atomics */						\
+		#op_lse "\t%" #w "[ret], %" #w "[val], %[ptr]\n"	\
+		#op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n"	\
+		__nops(2))						\
+	: [loop] "=&r" (loop), [ret] "=&r" (ret),			\
+	  [ptr] "+Q"(*(u##sz *)ptr)					\
+	: [val] "r" ((u##sz)(val)));					\
+									\
+	return ret;							\
 }
 
-/* this_cpu_cmpxchg */
-#define _protect_cmpxchg_local(pcp, o, n)			\
-({								\
-	typeof(*raw_cpu_ptr(&(pcp))) __ret;			\
-	preempt_disable();					\
-	__ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n);	\
-	preempt_enable();					\
-	__ret;							\
-})
-
-#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
-#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
-#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
-#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define PERCPU_OP(name, op_llsc, op_lse)				\
+	__PERCPU_OP_CASE(w, b, name,  8, op_llsc, op_lse)		\
+	__PERCPU_OP_CASE(w, h, name, 16, op_llsc, op_lse)		\
+	__PERCPU_OP_CASE(w,  , name, 32, op_llsc, op_lse)		\
+	__PERCPU_OP_CASE( ,  , name, 64, op_llsc, op_lse)
+
+#define PERCPU_RET_OP(name, op_llsc, op_lse)				\
+	__PERCPU_RET_OP_CASE(w, b, name,  8, op_llsc, op_lse)		\
+	__PERCPU_RET_OP_CASE(w, h, name, 16, op_llsc, op_lse)		\
+	__PERCPU_RET_OP_CASE(w,  , name, 32, op_llsc, op_lse)		\
+	__PERCPU_RET_OP_CASE( ,  , name, 64, op_llsc, op_lse)
+
+PERCPU_RW_OPS(8)
+PERCPU_RW_OPS(16)
+PERCPU_RW_OPS(32)
+PERCPU_RW_OPS(64)
+PERCPU_OP(add, add, stadd)
+PERCPU_OP(andnot, bic, stclr)
+PERCPU_OP(or, orr, stset)
+PERCPU_RET_OP(add, add, ldadd)
+
+#undef PERCPU_RW_OPS
+#undef __PERCPU_OP_CASE
+#undef __PERCPU_RET_OP_CASE
+#undef PERCPU_OP
+#undef PERCPU_RET_OP
 
+/*
+ * It would be nice to avoid the conditional call into the scheduler when
+ * re-enabling preemption for preemptible kernels, but doing that in a way
+ * which builds inside a module would mean messing directly with the preempt
+ * count. If you do this, peterz and tglx will hunt you down.
+ */
 #define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2)		\
 ({									\
 	int __ret;							\
-	preempt_disable();						\
+	preempt_disable_notrace();					\
 	__ret = cmpxchg_double_local(	raw_cpu_ptr(&(ptr1)),		\
 					raw_cpu_ptr(&(ptr2)),		\
 					o1, o2, n1, n2);		\
-	preempt_enable();						\
+	preempt_enable_notrace();					\
 	__ret;								\
 })
 
-#define _percpu_read(pcp)						\
+#define _pcp_protect(op, pcp, ...)					\
 ({									\
-	typeof(pcp) __retval;						\
 	preempt_disable_notrace();					\
-	__retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), 	\
-					      sizeof(pcp));		\
+	op(raw_cpu_ptr(&(pcp)), __VA_ARGS__);				\
 	preempt_enable_notrace();					\
-	__retval;							\
 })
 
-#define _percpu_write(pcp, val)						\
-do {									\
+#define _pcp_protect_return(op, pcp, args...)				\
+({									\
+	typeof(pcp) __retval;						\
 	preempt_disable_notrace();					\
-	__percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), 	\
-				sizeof(pcp));				\
+	__retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args);	\
 	preempt_enable_notrace();					\
-} while(0)								\
-
-#define _pcp_protect(operation, pcp, val)			\
-({								\
-	typeof(pcp) __retval;					\
-	preempt_disable();					\
-	__retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)),	\
-					  (val), sizeof(pcp));	\
-	preempt_enable();					\
-	__retval;						\
+	__retval;							\
 })
 
-#define _percpu_add(pcp, val) \
-	_pcp_protect(__percpu_add, pcp, val)
-
-#define _percpu_add_return(pcp, val) _percpu_add(pcp, val)
-
-#define _percpu_and(pcp, val) \
-	_pcp_protect(__percpu_and, pcp, val)
-
-#define _percpu_or(pcp, val) \
-	_pcp_protect(__percpu_or, pcp, val)
-
-#define _percpu_xchg(pcp, val) (typeof(pcp)) \
-	_pcp_protect(__percpu_xchg, pcp, (unsigned long)(val))
-
-#define this_cpu_add_1(pcp, val) _percpu_add(pcp, val)
-#define this_cpu_add_2(pcp, val) _percpu_add(pcp, val)
-#define this_cpu_add_4(pcp, val) _percpu_add(pcp, val)
-#define this_cpu_add_8(pcp, val) _percpu_add(pcp, val)
-
-#define this_cpu_add_return_1(pcp, val) _percpu_add_return(pcp, val)
-#define this_cpu_add_return_2(pcp, val) _percpu_add_return(pcp, val)
-#define this_cpu_add_return_4(pcp, val) _percpu_add_return(pcp, val)
-#define this_cpu_add_return_8(pcp, val) _percpu_add_return(pcp, val)
-
-#define this_cpu_and_1(pcp, val) _percpu_and(pcp, val)
-#define this_cpu_and_2(pcp, val) _percpu_and(pcp, val)
-#define this_cpu_and_4(pcp, val) _percpu_and(pcp, val)
-#define this_cpu_and_8(pcp, val) _percpu_and(pcp, val)
-
-#define this_cpu_or_1(pcp, val) _percpu_or(pcp, val)
-#define this_cpu_or_2(pcp, val) _percpu_or(pcp, val)
-#define this_cpu_or_4(pcp, val) _percpu_or(pcp, val)
-#define this_cpu_or_8(pcp, val) _percpu_or(pcp, val)
-
-#define this_cpu_read_1(pcp) _percpu_read(pcp)
-#define this_cpu_read_2(pcp) _percpu_read(pcp)
-#define this_cpu_read_4(pcp) _percpu_read(pcp)
-#define this_cpu_read_8(pcp) _percpu_read(pcp)
-
-#define this_cpu_write_1(pcp, val) _percpu_write(pcp, val)
-#define this_cpu_write_2(pcp, val) _percpu_write(pcp, val)
-#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val)
-#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val)
-
-#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val)
-#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val)
-#define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val)
-#define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val)
+#define this_cpu_read_1(pcp)		\
+	_pcp_protect_return(__percpu_read_8, pcp)
+#define this_cpu_read_2(pcp)		\
+	_pcp_protect_return(__percpu_read_16, pcp)
+#define this_cpu_read_4(pcp)		\
+	_pcp_protect_return(__percpu_read_32, pcp)
+#define this_cpu_read_8(pcp)		\
+	_pcp_protect_return(__percpu_read_64, pcp)
+
+#define this_cpu_write_1(pcp, val)	\
+	_pcp_protect(__percpu_write_8, pcp, (unsigned long)val)
+#define this_cpu_write_2(pcp, val)	\
+	_pcp_protect(__percpu_write_16, pcp, (unsigned long)val)
+#define this_cpu_write_4(pcp, val)	\
+	_pcp_protect(__percpu_write_32, pcp, (unsigned long)val)
+#define this_cpu_write_8(pcp, val)	\
+	_pcp_protect(__percpu_write_64, pcp, (unsigned long)val)
+
+#define this_cpu_add_1(pcp, val)	\
+	_pcp_protect(__percpu_add_case_8, pcp, val)
+#define this_cpu_add_2(pcp, val)	\
+	_pcp_protect(__percpu_add_case_16, pcp, val)
+#define this_cpu_add_4(pcp, val)	\
+	_pcp_protect(__percpu_add_case_32, pcp, val)
+#define this_cpu_add_8(pcp, val)	\
+	_pcp_protect(__percpu_add_case_64, pcp, val)
+
+#define this_cpu_add_return_1(pcp, val)	\
+	_pcp_protect_return(__percpu_add_return_case_8, pcp, val)
+#define this_cpu_add_return_2(pcp, val)	\
+	_pcp_protect_return(__percpu_add_return_case_16, pcp, val)
+#define this_cpu_add_return_4(pcp, val)	\
+	_pcp_protect_return(__percpu_add_return_case_32, pcp, val)
+#define this_cpu_add_return_8(pcp, val)	\
+	_pcp_protect_return(__percpu_add_return_case_64, pcp, val)
+
+#define this_cpu_and_1(pcp, val)	\
+	_pcp_protect(__percpu_andnot_case_8, pcp, ~val)
+#define this_cpu_and_2(pcp, val)	\
+	_pcp_protect(__percpu_andnot_case_16, pcp, ~val)
+#define this_cpu_and_4(pcp, val)	\
+	_pcp_protect(__percpu_andnot_case_32, pcp, ~val)
+#define this_cpu_and_8(pcp, val)	\
+	_pcp_protect(__percpu_andnot_case_64, pcp, ~val)
+
+#define this_cpu_or_1(pcp, val)		\
+	_pcp_protect(__percpu_or_case_8, pcp, val)
+#define this_cpu_or_2(pcp, val)		\
+	_pcp_protect(__percpu_or_case_16, pcp, val)
+#define this_cpu_or_4(pcp, val)		\
+	_pcp_protect(__percpu_or_case_32, pcp, val)
+#define this_cpu_or_8(pcp, val)		\
+	_pcp_protect(__percpu_or_case_64, pcp, val)
+
+#define this_cpu_xchg_1(pcp, val)	\
+	_pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_2(pcp, val)	\
+	_pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_4(pcp, val)	\
+	_pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_8(pcp, val)	\
+	_pcp_protect_return(xchg_relaxed, pcp, val)
+
+#define this_cpu_cmpxchg_1(pcp, o, n)	\
+	_pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_2(pcp, o, n)	\
+	_pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_4(pcp, o, n)	\
+	_pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_8(pcp, o, n)	\
+	_pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
 
 #include <asm-generic/percpu.h>
 
-- 
cgit 


From 4230509978f2921182da4e9197964dccdbe463c3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 18 Sep 2018 09:39:55 +0100
Subject: arm64: cmpxchg: Use "K" instead of "L" for ll/sc immediate constraint

The "L" AArch64 machine constraint, which we use for the "old" value in
an LL/SC cmpxchg(), generates an immediate that is suitable for a 64-bit
logical instruction. However, for cmpxchg() operations on types smaller
than 64 bits, this constraint can result in an invalid instruction which
is correctly rejected by GAS, such as EOR W1, W1, #0xffffffff.

Whilst we could special-case the constraint based on the cmpxchg size,
it's far easier to change the constraint to "K" and put up with using
a register for large 64-bit immediates. For out-of-line LL/SC atomics,
this is all moot anyway.

Reported-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/atomic_ll_sc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index b53f70dd6e10..af7b99005453 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -276,7 +276,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr,		\
 	"2:"								\
 	: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),			\
 	  [v] "+Q" (*(u##sz *)ptr)					\
-	: [old] "Lr" (old), [new] "r" (new)				\
+	: [old] "Kr" (old), [new] "r" (new)				\
 	: cl);								\
 									\
 	return oldval;							\
-- 
cgit 


From 386b3c7bdafcc67aaf4168e9627b381370b6538a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 7 Dec 2018 18:08:16 +0000
Subject: arm64: add EXPORT_SYMBOL_NOKASAN()

So that we can export symbols directly from assembly files, let's make
use of the generic <asm/export.h>. We have a few symbols that we'll want
to conditionally export for !KASAN kernel builds, so we add a helper for
that in <asm/assembler.h>.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/assembler.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 953208267252..d8b9d3a427d1 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -23,6 +23,8 @@
 #ifndef __ASM_ASSEMBLER_H
 #define __ASM_ASSEMBLER_H
 
+#include <asm-generic/export.h>
+
 #include <asm/asm-offsets.h>
 #include <asm/cpufeature.h>
 #include <asm/debug-monitors.h>
@@ -490,6 +492,13 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 #else
 #define NOKPROBE(x)
 #endif
+
+#ifdef CONFIG_KASAN
+#define EXPORT_SYMBOL_NOKASAN(name)
+#else
+#define EXPORT_SYMBOL_NOKASAN(name)	EXPORT_SYMBOL(name)
+#endif
+
 	/*
 	 * Emit a 64-bit absolute little endian symbol reference in a way that
 	 * ensures that it will be resolved at build time, even when building a
-- 
cgit 


From 33e5f4e50917c2508c05898f391a971b15eec93e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 6 Dec 2018 17:31:20 +0000
Subject: KVM: arm64: Rework detection of SVE, !VHE systems

An SVE system is so far the only case where we mandate VHE. As we're
starting to grow this requirements, let's slightly rework the way we
deal with that situation, allowing for easy extension of this check.

Acked-by: Christoffer Dall <christoffer.dall@arm.com>
Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/kvm_host.h   | 2 +-
 arch/arm64/include/asm/kvm_host.h | 6 +++---
 virt/kvm/arm/arm.c                | 8 ++++----
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 5ca5d9af0c26..2184d9ddb418 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -285,7 +285,7 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
 
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
-static inline bool kvm_arch_check_sve_has_vhe(void) { return true; }
+static inline bool kvm_arch_requires_vhe(void) { return false; }
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 52fbc823ff8c..d6d9aa76a943 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -422,7 +422,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
 	}
 }
 
-static inline bool kvm_arch_check_sve_has_vhe(void)
+static inline bool kvm_arch_requires_vhe(void)
 {
 	/*
 	 * The Arm architecture specifies that implementation of SVE
@@ -430,9 +430,9 @@ static inline bool kvm_arch_check_sve_has_vhe(void)
 	 * relies on this when SVE is present:
 	 */
 	if (system_supports_sve())
-		return has_vhe();
-	else
 		return true;
+
+	return false;
 }
 
 static inline void kvm_arch_hardware_unsetup(void) {}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 23774970c9df..36165748a315 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1640,8 +1640,10 @@ int kvm_arch_init(void *opaque)
 		return -ENODEV;
 	}
 
-	if (!kvm_arch_check_sve_has_vhe()) {
-		kvm_pr_unimpl("SVE system without VHE unsupported.  Broken cpu?");
+	in_hyp_mode = is_kernel_in_hyp_mode();
+
+	if (!in_hyp_mode && kvm_arch_requires_vhe()) {
+		kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n");
 		return -ENODEV;
 	}
 
@@ -1657,8 +1659,6 @@ int kvm_arch_init(void *opaque)
 	if (err)
 		return err;
 
-	in_hyp_mode = is_kernel_in_hyp_mode();
-
 	if (!in_hyp_mode) {
 		err = init_hyp_mode();
 		if (err)
-- 
cgit 


From 793d5d9213c701fcef8823e053ecd6919d1e4c21 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 6 Dec 2018 17:31:22 +0000
Subject: arm64: Add TCR_EPD{0,1} definitions

We are soon going to play with TCR_EL1.EPD{0,1}, so let's add the
relevant definitions.

Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/pgtable-hwdef.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 1d7d8da2ef9b..a7d5d6e459eb 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -224,6 +224,8 @@
 #define TCR_TxSZ_WIDTH		6
 #define TCR_T0SZ_MASK		(((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET)
 
+#define TCR_EPD0_SHIFT		7
+#define TCR_EPD0_MASK		(UL(1) << TCR_EPD0_SHIFT)
 #define TCR_IRGN0_SHIFT		8
 #define TCR_IRGN0_MASK		(UL(3) << TCR_IRGN0_SHIFT)
 #define TCR_IRGN0_NC		(UL(0) << TCR_IRGN0_SHIFT)
@@ -231,6 +233,8 @@
 #define TCR_IRGN0_WT		(UL(2) << TCR_IRGN0_SHIFT)
 #define TCR_IRGN0_WBnWA		(UL(3) << TCR_IRGN0_SHIFT)
 
+#define TCR_EPD1_SHIFT		23
+#define TCR_EPD1_MASK		(UL(1) << TCR_EPD1_SHIFT)
 #define TCR_IRGN1_SHIFT		24
 #define TCR_IRGN1_MASK		(UL(3) << TCR_IRGN1_SHIFT)
 #define TCR_IRGN1_NC		(UL(0) << TCR_IRGN1_SHIFT)
-- 
cgit 


From 8b2cca9ade2c0f1d2ba94e39781e7306c918e544 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 6 Dec 2018 17:31:23 +0000
Subject: arm64: KVM: Force VHE for systems affected by erratum 1165522

In order to easily mitigate ARM erratum 1165522, we need to force
affected CPUs to run in VHE mode if using KVM.

Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpucaps.h  | 3 ++-
 arch/arm64/include/asm/kvm_host.h | 4 ++++
 arch/arm64/kernel/cpu_errata.c    | 8 ++++++++
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 6e2d254c09eb..62d8cd15fdf2 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -54,7 +54,8 @@
 #define ARM64_HAS_CRC32				33
 #define ARM64_SSBS				34
 #define ARM64_WORKAROUND_1188873		35
+#define ARM64_WORKAROUND_1165522		36
 
-#define ARM64_NCAPS				36
+#define ARM64_NCAPS				37
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d6d9aa76a943..9217759afa6b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -432,6 +432,10 @@ static inline bool kvm_arch_requires_vhe(void)
 	if (system_supports_sve())
 		return true;
 
+	/* Some implementations have defects that confine them to VHE */
+	if (cpus_have_cap(ARM64_WORKAROUND_1165522))
+		return true;
+
 	return false;
 }
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a509e35132d2..476e738e6c46 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -739,6 +739,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.capability = ARM64_WORKAROUND_1188873,
 		ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
 	},
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1165522
+	{
+		/* Cortex-A76 r0p0 to r2p0 */
+		.desc = "ARM erratum 1165522",
+		.capability = ARM64_WORKAROUND_1165522,
+		ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
+	},
 #endif
 	{
 	}
-- 
cgit 


From 1e4448c5ddbe93ab6070160f094f49e7c95477e6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 6 Dec 2018 17:31:24 +0000
Subject: arm64: KVM: Add synchronization on translation regime change for
 erratum 1165522

In order to ensure that slipping HCR_EL2.TGE is done at the right
time when switching translation regime, let insert the required ISBs
that will be patched in when erratum 1165522 is detected.

Take this opportunity to add the missing include of asm/alternative.h
which was getting there by pure luck.

Acked-by: Christoffer Dall <christoffer.dall@arm.com>
Reviewed-by: James Morse <james.morse@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/kvm_hyp.h |  8 ++++++++
 arch/arm64/kvm/hyp/switch.c      | 19 +++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 23aca66767f9..a80a7ef57325 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -20,6 +20,7 @@
 
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
+#include <asm/alternative.h>
 #include <asm/sysreg.h>
 
 #define __hyp_text __section(.hyp.text) notrace
@@ -163,6 +164,13 @@ static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm)
 {
 	write_sysreg(kvm->arch.vtcr, vtcr_el2);
 	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+
+	/*
+	 * ARM erratum 1165522 requires the actual execution of the above
+	 * before we can switch to the EL1/EL0 translation regime used by
+	 * the guest.
+	 */
+	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522));
 }
 
 #endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index a8fa61c68c32..31ee0bfc432f 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -143,6 +143,14 @@ static void deactivate_traps_vhe(void)
 {
 	extern char vectors[];	/* kernel exception vectors */
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
+
+	/*
+	 * ARM erratum 1165522 requires the actual execution of the above
+	 * before we can switch to the EL2/EL0 translation regime used by
+	 * the host.
+	 */
+	asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_1165522));
+
 	write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
 	write_sysreg(vectors, vbar_el1);
 }
@@ -499,6 +507,17 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 
 	sysreg_save_host_state_vhe(host_ctxt);
 
+	/*
+	 * ARM erratum 1165522 requires us to configure both stage 1 and
+	 * stage 2 translation for the guest context before we clear
+	 * HCR_EL2.TGE.
+	 *
+	 * We have already configured the guest's stage 1 translation in
+	 * kvm_vcpu_load_sysregs above.  We must now call __activate_vm
+	 * before __activate_traps, because __activate_vm configures
+	 * stage 2 translation, and __activate_traps clear HCR_EL2.TGE
+	 * (among other things).
+	 */
 	__activate_vm(vcpu->kvm);
 	__activate_traps(vcpu);
 
-- 
cgit 


From 33309ecda0070506c49182530abe7728850ebe78 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 10 Dec 2018 13:39:48 +0000
Subject: arm64: Fix minor issues with the dcache_by_line_op macro

The dcache_by_line_op macro suffers from a couple of small problems:

First, the GAS directives that are currently being used rely on
assembler behavior that is not documented, and probably not guaranteed
to produce the correct behavior going forward. As a result, we end up
with some undefined symbols in cache.o:

$ nm arch/arm64/mm/cache.o
         ...
         U civac
         ...
         U cvac
         U cvap
         U cvau

This is due to the fact that the comparisons used to select the
operation type in the dcache_by_line_op macro are comparing symbols
not strings, and even though it seems that GAS is doing the right
thing here (undefined symbols by the same name are equal to each
other), it seems unwise to rely on this.

Second, when patching in a DC CVAP instruction on CPUs that support it,
the fallback path consists of a DC CVAU instruction which may be
affected by CPU errata that require ARM64_WORKAROUND_CLEAN_CACHE.

Solve these issues by unrolling the various maintenance routines and
using the conditional directives that are documented as operating on
strings. To avoid the complexity of nested alternatives, we move the
DC CVAP patching to __clean_dcache_area_pop, falling back to a branch
to __clean_dcache_area_poc if DCPOP is not supported by the CPU.

Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Suggested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/assembler.h | 30 ++++++++++++++++++------------
 arch/arm64/mm/cache.S              |  3 +++
 2 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index d8b9d3a427d1..d103c3ee7335 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -392,27 +392,33 @@ alternative_endif
  * 	size:		size of the region
  * 	Corrupts:	kaddr, size, tmp1, tmp2
  */
+	.macro __dcache_op_workaround_clean_cache, op, kaddr
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+	dc	\op, \kaddr
+alternative_else
+	dc	civac, \kaddr
+alternative_endif
+	.endm
+
 	.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
 	dcache_line_size \tmp1, \tmp2
 	add	\size, \kaddr, \size
 	sub	\tmp2, \tmp1, #1
 	bic	\kaddr, \kaddr, \tmp2
 9998:
-	.if	(\op == cvau || \op == cvac)
-alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
-	dc	\op, \kaddr
-alternative_else
-	dc	civac, \kaddr
-alternative_endif
-	.elseif	(\op == cvap)
-alternative_if ARM64_HAS_DCPOP
-	sys 3, c7, c12, 1, \kaddr	// dc cvap
-alternative_else
-	dc	cvac, \kaddr
-alternative_endif
+	.ifc	\op, cvau
+	__dcache_op_workaround_clean_cache \op, \kaddr
+	.else
+	.ifc	\op, cvac
+	__dcache_op_workaround_clean_cache \op, \kaddr
+	.else
+	.ifc	\op, cvap
+	sys	3, c7, c12, 1, \kaddr	// dc cvap
 	.else
 	dc	\op, \kaddr
 	.endif
+	.endif
+	.endif
 	add	\kaddr, \kaddr, \tmp1
 	cmp	\kaddr, \size
 	b.lo	9998b
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 0c22ede52f90..a194fd0e837f 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -212,6 +212,9 @@ ENDPROC(__dma_clean_area)
  *	- size    - size in question
  */
 ENTRY(__clean_dcache_area_pop)
+	alternative_if_not ARM64_HAS_DCPOP
+	b	__clean_dcache_area_poc
+	alternative_else_nop_endif
 	dcache_by_line_op cvap, sy, x0, x1, x2, x3
 	ret
 ENDPIPROC(__clean_dcache_area_pop)
-- 
cgit 


From 6e8830674ea77f57d57a33cca09083b117a71f41 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Fri, 7 Dec 2018 17:34:49 -0500
Subject: arm64: kasan: Increase stack size for KASAN_EXTRA

If the kernel is configured with KASAN_EXTRA, the stack size is
increased significantly due to setting the GCC -fstack-reuse option to
"none" [1]. As a result, it can trigger a stack overrun quite often with
32k stack size compiled using GCC 8. For example, this reproducer

  https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/madvise/madvise06.c

can trigger a "corrupted stack end detected inside scheduler" very
reliably with CONFIG_SCHED_STACK_END_CHECK enabled. There are other
reports at:

  https://lore.kernel.org/lkml/1542144497.12945.29.camel@gmx.us/
  https://lore.kernel.org/lkml/721E7B42-2D55-4866-9C1A-3E8D64F33F9C@gmx.us/

There are just too many functions that could have a large stack with
KASAN_EXTRA due to large local variables that have been called over and
over again without being able to reuse the stacks. Some noticiable ones
are,

size
7536 shrink_inactive_list
7440 shrink_page_list
6560 fscache_stats_show
3920 jbd2_journal_commit_transaction
3216 try_to_unmap_one
3072 migrate_page_move_mapping
3584 migrate_misplaced_transhuge_page
3920 ip_vs_lblcr_schedule
4304 lpfc_nvme_info_show
3888 lpfc_debugfs_nvmestat_data.constprop

There are other 49 functions over 2k in size while compiling kernel with
"-Wframe-larger-than=" on this machine. Hence, it is too much work to
change Makefiles for each object to compile without
-fsanitize-address-use-after-scope individually.

[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715#c23

Signed-off-by: Qian Cai <cai@lca.pw>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/memory.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index b96442960aea..56562ff01076 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -76,12 +76,17 @@
 /*
  * KASAN requires 1/8th of the kernel virtual address space for the shadow
  * region. KASAN can bloat the stack significantly, so double the (minimum)
- * stack size when KASAN is in use.
+ * stack size when KASAN is in use, and then double it again if KASAN_EXTRA is
+ * on.
  */
 #ifdef CONFIG_KASAN
 #define KASAN_SHADOW_SCALE_SHIFT 3
 #define KASAN_SHADOW_SIZE	(UL(1) << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT))
+#ifdef CONFIG_KASAN_EXTRA
+#define KASAN_THREAD_SHIFT	2
+#else
 #define KASAN_THREAD_SHIFT	1
+#endif /* CONFIG_KASAN_EXTRA */
 #else
 #define KASAN_SHADOW_SIZE	(0)
 #define KASAN_THREAD_SHIFT	0
-- 
cgit 


From 363524d2b12270d86677e1154ecc1c5061f43219 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Thu, 6 Dec 2018 22:50:37 +0000
Subject: arm64: mm: Introduce DEFAULT_MAP_WINDOW

We wish to introduce a 52-bit virtual address space for userspace but
maintain compatibility with software that assumes the maximum VA space
size is 48 bit.

In order to achieve this, on 52-bit VA systems, we make mmap behave as
if it were running on a 48-bit VA system (unless userspace explicitly
requests a VA where addr[51:48] != 0).

On a system running a 52-bit userspace we need TASK_SIZE to represent
the 52-bit limit as it is used in various places to distinguish between
kernelspace and userspace addresses.

Thus we need a new limit for mmap, stack, ELF loader and EFI (which uses
TTBR0) to represent the non-extended VA space.

This patch introduces DEFAULT_MAP_WINDOW and DEFAULT_MAP_WINDOW_64 and
switches the appropriate logic to use that instead of TASK_SIZE.

Signed-off-by: Steve Capper <steve.capper@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/elf.h            |  2 +-
 arch/arm64/include/asm/processor.h      | 10 ++++++++--
 arch/arm64/mm/init.c                    |  2 +-
 drivers/firmware/efi/arm-runtime.c      |  2 +-
 drivers/firmware/efi/libstub/arm-stub.c |  2 +-
 5 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 433b9554c6a1..bc9bd9e77d9d 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -117,7 +117,7 @@
  * 64-bit, this is above 4GB to leave the entire 32-bit address
  * space open for things that want to use the area for 32-bit pointers.
  */
-#define ELF_ET_DYN_BASE		(2 * TASK_SIZE_64 / 3)
+#define ELF_ET_DYN_BASE		(2 * DEFAULT_MAP_WINDOW_64 / 3)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 6b0d4dff5012..102bab4344b7 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -53,19 +53,25 @@
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
  */
+
+#define DEFAULT_MAP_WINDOW_64	(UL(1) << VA_BITS)
+
 #ifdef CONFIG_COMPAT
 #define TASK_SIZE_32		UL(0x100000000)
 #define TASK_SIZE		(test_thread_flag(TIF_32BIT) ? \
 				TASK_SIZE_32 : TASK_SIZE_64)
 #define TASK_SIZE_OF(tsk)	(test_tsk_thread_flag(tsk, TIF_32BIT) ? \
 				TASK_SIZE_32 : TASK_SIZE_64)
+#define DEFAULT_MAP_WINDOW	(test_thread_flag(TIF_32BIT) ? \
+				TASK_SIZE_32 : DEFAULT_MAP_WINDOW_64)
 #else
 #define TASK_SIZE		TASK_SIZE_64
+#define DEFAULT_MAP_WINDOW	DEFAULT_MAP_WINDOW_64
 #endif /* CONFIG_COMPAT */
 
-#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 4))
+#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(DEFAULT_MAP_WINDOW / 4))
+#define STACK_TOP_MAX		DEFAULT_MAP_WINDOW_64
 
-#define STACK_TOP_MAX		TASK_SIZE_64
 #ifdef CONFIG_COMPAT
 #define AARCH32_VECTORS_BASE	0xffff0000
 #define STACK_TOP		(test_thread_flag(TIF_32BIT) ? \
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 2983e0fc1786..6cde00554e9b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -609,7 +609,7 @@ void __init mem_init(void)
 	 * detected at build time already.
 	 */
 #ifdef CONFIG_COMPAT
-	BUILD_BUG_ON(TASK_SIZE_32			> TASK_SIZE_64);
+	BUILD_BUG_ON(TASK_SIZE_32 > DEFAULT_MAP_WINDOW_64);
 #endif
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index a00934d263c5..23ea1ed409d1 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -38,7 +38,7 @@ static struct ptdump_info efi_ptdump_info = {
 	.mm		= &efi_mm,
 	.markers	= (struct addr_marker[]){
 		{ 0,		"UEFI runtime start" },
-		{ TASK_SIZE_64,	"UEFI runtime end" }
+		{ DEFAULT_MAP_WINDOW_64, "UEFI runtime end" }
 	},
 	.base_addr	= 0,
 };
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 3d36142cf812..6640942a1c0d 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -33,7 +33,7 @@
 #define EFI_RT_VIRTUAL_SIZE	SZ_512M
 
 #ifdef CONFIG_ARM64
-# define EFI_RT_VIRTUAL_LIMIT	TASK_SIZE_64
+# define EFI_RT_VIRTUAL_LIMIT	DEFAULT_MAP_WINDOW_64
 #else
 # define EFI_RT_VIRTUAL_LIMIT	TASK_SIZE
 #endif
-- 
cgit 


From e5d99157459f79ed26953e8a77addb9d3e0122b7 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Thu, 6 Dec 2018 22:50:38 +0000
Subject: arm64: mm: Define arch_get_mmap_end, arch_get_mmap_base

Now that we have DEFAULT_MAP_WINDOW defined, we can arch_get_mmap_end
and arch_get_mmap_base helpers to allow for high addresses in mmap.

Signed-off-by: Steve Capper <steve.capper@arm.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/processor.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 102bab4344b7..759927faf7f6 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -80,6 +80,13 @@
 #define STACK_TOP		STACK_TOP_MAX
 #endif /* CONFIG_COMPAT */
 
+#define arch_get_mmap_end(addr) ((addr > DEFAULT_MAP_WINDOW) ? TASK_SIZE :\
+				DEFAULT_MAP_WINDOW)
+
+#define arch_get_mmap_base(addr, base) ((addr > DEFAULT_MAP_WINDOW) ? \
+					base + TASK_SIZE - DEFAULT_MAP_WINDOW :\
+					base)
+
 extern phys_addr_t arm64_dma_phys_limit;
 #define ARCH_LOW_ADDRESS_LIMIT	(arm64_dma_phys_limit - 1)
 
-- 
cgit 


From e842dfb5a2d3b4c43766508ef89a4eb67471d53a Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Thu, 6 Dec 2018 22:50:39 +0000
Subject: arm64: mm: Offset TTBR1 to allow 52-bit PTRS_PER_PGD

Enabling 52-bit VAs on arm64 requires that the PGD table expands from 64
entries (for the 48-bit case) to 1024 entries. This quantity,
PTRS_PER_PGD is used as follows to compute which PGD entry corresponds
to a given virtual address, addr:

pgd_index(addr) -> (addr >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)

Userspace addresses are prefixed by 0's, so for a 48-bit userspace
address, uva, the following is true:
(uva >> PGDIR_SHIFT) & (1024 - 1) == (uva >> PGDIR_SHIFT) & (64 - 1)

In other words, a 48-bit userspace address will have the same pgd_index
when using PTRS_PER_PGD = 64 and 1024.

Kernel addresses are prefixed by 1's so, given a 48-bit kernel address,
kva, we have the following inequality:
(kva >> PGDIR_SHIFT) & (1024 - 1) != (kva >> PGDIR_SHIFT) & (64 - 1)

In other words a 48-bit kernel virtual address will have a different
pgd_index when using PTRS_PER_PGD = 64 and 1024.

If, however, we note that:
kva = 0xFFFF << 48 + lower (where lower[63:48] == 0b)
and, PGDIR_SHIFT = 42 (as we are dealing with 64KB PAGE_SIZE)

We can consider:
(kva >> PGDIR_SHIFT) & (1024 - 1) - (kva >> PGDIR_SHIFT) & (64 - 1)
 = (0xFFFF << 6) & 0x3FF - (0xFFFF << 6) & 0x3F	// "lower" cancels out
 = 0x3C0

In other words, one can switch PTRS_PER_PGD to the 52-bit value globally
provided that they increment ttbr1_el1 by 0x3C0 * 8 = 0x1E00 bytes when
running with 48-bit kernel VAs (TCR_EL1.T1SZ = 16).

For kernel configuration where 52-bit userspace VAs are possible, this
patch offsets ttbr1_el1 and sets PTRS_PER_PGD corresponding to the
52-bit value.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Steve Capper <steve.capper@arm.com>
[will: added comment to TTBR1_BADDR_4852_OFFSET calculation]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/assembler.h     | 23 +++++++++++++++++++++++
 arch/arm64/include/asm/pgtable-hwdef.h | 10 ++++++++++
 arch/arm64/kernel/head.S               |  1 +
 arch/arm64/kernel/hibernate-asm.S      |  1 +
 arch/arm64/mm/proc.S                   |  4 ++++
 5 files changed, 39 insertions(+)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index d103c3ee7335..ba609e0439e8 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -543,6 +543,29 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 	mrs	\rd, sp_el0
 	.endm
 
+/*
+ * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
+ * orr is used as it can cover the immediate value (and is idempotent).
+ * In future this may be nop'ed out when dealing with 52-bit kernel VAs.
+ * 	ttbr: Value of ttbr to set, modified.
+ */
+	.macro	offset_ttbr1, ttbr
+#ifdef CONFIG_ARM64_52BIT_VA
+	orr	\ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
+#endif
+	.endm
+
+/*
+ * Perform the reverse of offset_ttbr1.
+ * bic is used as it can cover the immediate value and, in future, won't need
+ * to be nop'ed out when dealing with 52-bit kernel VAs.
+ */
+	.macro	restore_ttbr1, ttbr
+#ifdef CONFIG_ARM64_52BIT_VA
+	bic	\ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
+#endif
+	.endm
+
 /*
  * Arrange a physical address in a TTBR register, taking care of 52-bit
  * addresses.
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 1d7d8da2ef9b..d5219f2624b7 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -80,7 +80,11 @@
 #define PGDIR_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
 #define PGDIR_SIZE		(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK		(~(PGDIR_SIZE-1))
+#ifdef CONFIG_ARM64_52BIT_VA
+#define PTRS_PER_PGD		(1 << (52 - PGDIR_SHIFT))
+#else
 #define PTRS_PER_PGD		(1 << (VA_BITS - PGDIR_SHIFT))
+#endif
 
 /*
  * Section address mask and size definitions.
@@ -306,4 +310,10 @@
 #define TTBR_BADDR_MASK_52	(((UL(1) << 46) - 1) << 2)
 #endif
 
+#ifdef CONFIG_ARM64_52BIT_VA
+/* Must be at least 64-byte aligned to prevent corruption of the TTBR */
+#define TTBR1_BADDR_4852_OFFSET	(((UL(1) << (52 - PGDIR_SHIFT)) - \
+				 (UL(1) << (48 - PGDIR_SHIFT))) * 8)
+#endif
+
 #endif
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 4471f570a295..f60081be9a1b 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -769,6 +769,7 @@ ENTRY(__enable_mmu)
 	phys_to_ttbr x1, x1
 	phys_to_ttbr x2, x2
 	msr	ttbr0_el1, x2			// load TTBR0
+	offset_ttbr1 x1
 	msr	ttbr1_el1, x1			// load TTBR1
 	isb
 	msr	sctlr_el1, x0
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index dd14ab8c9f72..fe36d85c60bd 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -40,6 +40,7 @@
 	tlbi	vmalle1
 	dsb	nsh
 	phys_to_ttbr \tmp, \page_table
+	offset_ttbr1 \tmp
 	msr	ttbr1_el1, \tmp
 	isb
 .endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 2c75b0b903ae..2db1c491d45d 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -182,6 +182,7 @@ ENDPROC(cpu_do_switch_mm)
 .macro	__idmap_cpu_set_reserved_ttbr1, tmp1, tmp2
 	adrp	\tmp1, empty_zero_page
 	phys_to_ttbr \tmp2, \tmp1
+	offset_ttbr1 \tmp2
 	msr	ttbr1_el1, \tmp2
 	isb
 	tlbi	vmalle1
@@ -200,6 +201,7 @@ ENTRY(idmap_cpu_replace_ttbr1)
 
 	__idmap_cpu_set_reserved_ttbr1 x1, x3
 
+	offset_ttbr1 x0
 	msr	ttbr1_el1, x0
 	isb
 
@@ -254,6 +256,7 @@ ENTRY(idmap_kpti_install_ng_mappings)
 	pte		.req	x16
 
 	mrs	swapper_ttb, ttbr1_el1
+	restore_ttbr1	swapper_ttb
 	adr	flag_ptr, __idmap_kpti_flag
 
 	cbnz	cpu, __idmap_kpti_secondary
@@ -373,6 +376,7 @@ __idmap_kpti_secondary:
 	cbnz	w18, 1b
 
 	/* All done, act like nothing happened */
+	offset_ttbr1 swapper_ttb
 	msr	ttbr1_el1, swapper_ttb
 	isb
 	ret
-- 
cgit 


From 67e7fdfcc6824a4f768d76d89377b33baad58fad Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Thu, 6 Dec 2018 22:50:41 +0000
Subject: arm64: mm: introduce 52-bit userspace support

On arm64 there is optional support for a 52-bit virtual address space.
To exploit this one has to be running with a 64KB page size and be
running on hardware that supports this.

For an arm64 kernel supporting a 48 bit VA with a 64KB page size,
some changes are needed to support a 52-bit userspace:
 * TCR_EL1.T0SZ needs to be 12 instead of 16,
 * TASK_SIZE needs to reflect the new size.

This patch implements the above when the support for 52-bit VAs is
detected at early boot time.

On arm64 userspace addresses translation is controlled by TTBR0_EL1. As
well as userspace, TTBR0_EL1 controls:
 * The identity mapping,
 * EFI runtime code.

It is possible to run a kernel with an identity mapping that has a
larger VA size than userspace (and for this case __cpu_set_tcr_t0sz()
would set TCR_EL1.T0SZ as appropriate). However, when the conditions for
52-bit userspace are met; it is possible to keep TCR_EL1.T0SZ fixed at
12. Thus in this patch, the TCR_EL1.T0SZ size changing logic is
disabled.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig                   |  4 ++++
 arch/arm64/include/asm/assembler.h   |  7 +++----
 arch/arm64/include/asm/mmu_context.h |  3 +++
 arch/arm64/include/asm/processor.h   | 13 +++++++++----
 arch/arm64/kernel/head.S             | 13 +++++++++++++
 arch/arm64/mm/fault.c                |  2 +-
 arch/arm64/mm/mmu.c                  |  1 +
 arch/arm64/mm/proc.S                 | 10 +++++++++-
 8 files changed, 43 insertions(+), 10 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6d2b25f51bb3..858e353b2f40 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -716,6 +716,10 @@ config ARM64_PA_BITS_52
 
 endchoice
 
+config ARM64_52BIT_VA
+	def_bool y
+	depends on ARM64_VA_BITS_48 && ARM64_64K_PAGES && (ARM64_PAN || !ARM64_SW_TTBR0_PAN)
+
 config ARM64_PA_BITS
 	int
 	default 48 if ARM64_PA_BITS_48
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index ba609e0439e8..122d91d4097a 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -357,11 +357,10 @@ alternative_endif
 	.endm
 
 /*
- * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ * tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map
  */
-	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
-	ldr_l	\tmpreg, idmap_t0sz
-	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+	.macro	tcr_set_t0sz, valreg, t0sz
+	bfi	\valreg, \t0sz, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
 	.endm
 
 /*
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index dfcfeffd2080..b0768502fa08 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -74,6 +74,9 @@ extern u64 idmap_ptrs_per_pgd;
 
 static inline bool __cpu_uses_extended_idmap(void)
 {
+	if (IS_ENABLED(CONFIG_ARM64_52BIT_VA))
+		return false;
+
 	return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS));
 }
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 759927faf7f6..7ff75e52b762 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -19,10 +19,12 @@
 #ifndef __ASM_PROCESSOR_H
 #define __ASM_PROCESSOR_H
 
-#define TASK_SIZE_64		(UL(1) << VA_BITS)
-
-#define KERNEL_DS	UL(-1)
-#define USER_DS		(TASK_SIZE_64 - 1)
+#define KERNEL_DS		UL(-1)
+#ifdef CONFIG_ARM64_52BIT_VA
+#define USER_DS			((UL(1) << 52) - 1)
+#else
+#define USER_DS			((UL(1) << VA_BITS) - 1)
+#endif /* CONFIG_ARM64_52BIT_VA */
 
 /*
  * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
@@ -56,6 +58,9 @@
 
 #define DEFAULT_MAP_WINDOW_64	(UL(1) << VA_BITS)
 
+extern u64 vabits_user;
+#define TASK_SIZE_64		(UL(1) << vabits_user)
+
 #ifdef CONFIG_COMPAT
 #define TASK_SIZE_32		UL(0x100000000)
 #define TASK_SIZE		(test_thread_flag(TIF_32BIT) ? \
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 58fcc1edd852..c229d9cfe9bf 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -318,6 +318,19 @@ __create_page_tables:
 	adrp	x0, idmap_pg_dir
 	adrp	x3, __idmap_text_start		// __pa(__idmap_text_start)
 
+#ifdef CONFIG_ARM64_52BIT_VA
+	mrs_s	x6, SYS_ID_AA64MMFR2_EL1
+	and	x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
+	mov	x5, #52
+	cbnz	x6, 1f
+#endif
+	mov	x5, #VA_BITS
+1:
+	adr_l	x6, vabits_user
+	str	x5, [x6]
+	dmb	sy
+	dc	ivac, x6		// Invalidate potentially stale cache line
+
 	/*
 	 * VA_BITS may be too small to allow for an ID mapping to be created
 	 * that covers system RAM if that is located sufficiently high in the
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 7d9571f4ae3d..5fe6d2e40e9b 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -160,7 +160,7 @@ void show_pte(unsigned long addr)
 
 	pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
 		 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
-		 VA_BITS, mm->pgd);
+		 mm == &init_mm ? VA_BITS : (int) vabits_user, mm->pgd);
 	pgdp = pgd_offset(mm, addr);
 	pgd = READ_ONCE(*pgdp);
 	pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e1b2d58a311a..0d3eacc4bfbb 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -52,6 +52,7 @@
 
 u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
 u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
+u64 vabits_user __ro_after_init;
 
 u64 kimage_voffset __ro_after_init;
 EXPORT_SYMBOL(kimage_voffset);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 2db1c491d45d..0cf86b17714c 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -450,7 +450,15 @@ ENTRY(__cpu_setup)
 	ldr	x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
 			TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
 			TCR_TBI0 | TCR_A1
-	tcr_set_idmap_t0sz	x10, x9
+
+#ifdef CONFIG_ARM64_52BIT_VA
+	ldr_l 		x9, vabits_user
+	sub		x9, xzr, x9
+	add		x9, x9, #64
+#else
+	ldr_l		x9, idmap_t0sz
+#endif
+	tcr_set_t0sz	x10, x9
 
 	/*
 	 * Set the IPS bits in TCR_EL1.
-- 
cgit 


From b9567720a1b8e739380e0241413606c056c57859 Mon Sep 17 00:00:00 2001
From: Steve Capper <steve.capper@arm.com>
Date: Thu, 6 Dec 2018 22:50:42 +0000
Subject: arm64: mm: Allow forcing all userspace addresses to 52-bit

On arm64 52-bit VAs are provided to userspace when a hint is supplied to
mmap. This helps maintain compatibility with software that expects at
most 48-bit VAs to be returned.

In order to help identify software that has 48-bit VA assumptions, this
patch allows one to compile a kernel where 52-bit VAs are returned by
default on HW that supports it.

This feature is intended to be for development systems only.

Signed-off-by: Steve Capper <steve.capper@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig                 | 13 +++++++++++++
 arch/arm64/include/asm/elf.h       |  4 ++++
 arch/arm64/include/asm/processor.h |  9 ++++++++-
 3 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 858e353b2f40..ca1f93233b22 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1186,6 +1186,19 @@ config ARM64_CNP
 	  at runtime, and does not affect PEs that do not implement
 	  this feature.
 
+config ARM64_FORCE_52BIT
+	bool "Force 52-bit virtual addresses for userspace"
+	depends on ARM64_52BIT_VA && EXPERT
+	help
+	  For systems with 52-bit userspace VAs enabled, the kernel will attempt
+	  to maintain compatibility with older software by providing 48-bit VAs
+	  unless a hint is supplied to mmap.
+
+	  This configuration option disables the 48-bit compatibility logic, and
+	  forces all userspace addresses to be 52-bit on HW that supports it. One
+	  should only enable this configuration option for stress testing userspace
+	  memory management code. If unsure say N here.
+
 endmenu
 
 config ARM64_SVE
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index bc9bd9e77d9d..6adc1a90e7e6 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -117,7 +117,11 @@
  * 64-bit, this is above 4GB to leave the entire 32-bit address
  * space open for things that want to use the area for 32-bit pointers.
  */
+#ifdef CONFIG_ARM64_FORCE_52BIT
+#define ELF_ET_DYN_BASE		(2 * TASK_SIZE_64 / 3)
+#else
 #define ELF_ET_DYN_BASE		(2 * DEFAULT_MAP_WINDOW_64 / 3)
+#endif /* CONFIG_ARM64_FORCE_52BIT */
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 7ff75e52b762..efa0210cf927 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -74,8 +74,13 @@ extern u64 vabits_user;
 #define DEFAULT_MAP_WINDOW	DEFAULT_MAP_WINDOW_64
 #endif /* CONFIG_COMPAT */
 
-#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(DEFAULT_MAP_WINDOW / 4))
+#ifdef CONFIG_ARM64_FORCE_52BIT
+#define STACK_TOP_MAX		TASK_SIZE_64
+#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(TASK_SIZE / 4))
+#else
 #define STACK_TOP_MAX		DEFAULT_MAP_WINDOW_64
+#define TASK_UNMAPPED_BASE	(PAGE_ALIGN(DEFAULT_MAP_WINDOW / 4))
+#endif /* CONFIG_ARM64_FORCE_52BIT */
 
 #ifdef CONFIG_COMPAT
 #define AARCH32_VECTORS_BASE	0xffff0000
@@ -85,12 +90,14 @@ extern u64 vabits_user;
 #define STACK_TOP		STACK_TOP_MAX
 #endif /* CONFIG_COMPAT */
 
+#ifndef CONFIG_ARM64_FORCE_52BIT
 #define arch_get_mmap_end(addr) ((addr > DEFAULT_MAP_WINDOW) ? TASK_SIZE :\
 				DEFAULT_MAP_WINDOW)
 
 #define arch_get_mmap_base(addr, base) ((addr > DEFAULT_MAP_WINDOW) ? \
 					base + TASK_SIZE - DEFAULT_MAP_WINDOW :\
 					base)
+#endif /* CONFIG_ARM64_FORCE_52BIT */
 
 extern phys_addr_t arm64_dma_phys_limit;
 #define ARCH_LOW_ADDRESS_LIMIT	(arm64_dma_phys_limit - 1)
-- 
cgit 


From 68d23da4373aba76f5300017c4746440f276698e Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 10 Dec 2018 14:15:15 +0000
Subject: arm64: Kconfig: Re-jig CONFIG options for 52-bit VA

Enabling 52-bit VAs for userspace is pretty confusing, since it requires
you to select "48-bit" virtual addressing in the Kconfig.

Rework the logic so that 52-bit user virtual addressing is advertised in
the "Virtual address space size" choice, along with some help text to
describe its interaction with Pointer Authentication. The EXPERT-only
option to force all user mappings to the 52-bit range is then made
available immediately below the VA size selection.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig                     | 47 +++++++++++++++++++++-------------
 arch/arm64/include/asm/assembler.h     |  4 +--
 arch/arm64/include/asm/mmu_context.h   |  2 +-
 arch/arm64/include/asm/pgtable-hwdef.h |  4 +--
 arch/arm64/include/asm/processor.h     |  4 +--
 arch/arm64/kernel/head.S               |  4 +--
 arch/arm64/kernel/smp.c                |  2 +-
 arch/arm64/mm/proc.S                   |  4 +--
 8 files changed, 41 insertions(+), 30 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index ca1f93233b22..905ce1653e82 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -682,15 +682,43 @@ config ARM64_VA_BITS_47
 config ARM64_VA_BITS_48
 	bool "48-bit"
 
+config ARM64_USER_VA_BITS_52
+	bool "52-bit (user)"
+	depends on ARM64_64K_PAGES && (ARM64_PAN || !ARM64_SW_TTBR0_PAN)
+	help
+	  Enable 52-bit virtual addressing for userspace when explicitly
+	  requested via a hint to mmap(). The kernel will continue to
+	  use 48-bit virtual addresses for its own mappings.
+
+	  NOTE: Enabling 52-bit virtual addressing in conjunction with
+	  ARMv8.3 Pointer Authentication will result in the PAC being
+	  reduced from 7 bits to 3 bits, which may have a significant
+	  impact on its susceptibility to brute-force attacks.
+
+	  If unsure, select 48-bit virtual addressing instead.
+
 endchoice
 
+config ARM64_FORCE_52BIT
+	bool "Force 52-bit virtual addresses for userspace"
+	depends on ARM64_USER_VA_BITS_52 && EXPERT
+	help
+	  For systems with 52-bit userspace VAs enabled, the kernel will attempt
+	  to maintain compatibility with older software by providing 48-bit VAs
+	  unless a hint is supplied to mmap.
+
+	  This configuration option disables the 48-bit compatibility logic, and
+	  forces all userspace addresses to be 52-bit on HW that supports it. One
+	  should only enable this configuration option for stress testing userspace
+	  memory management code. If unsure say N here.
+
 config ARM64_VA_BITS
 	int
 	default 36 if ARM64_VA_BITS_36
 	default 39 if ARM64_VA_BITS_39
 	default 42 if ARM64_VA_BITS_42
 	default 47 if ARM64_VA_BITS_47
-	default 48 if ARM64_VA_BITS_48
+	default 48 if ARM64_VA_BITS_48 || ARM64_USER_VA_BITS_52
 
 choice
 	prompt "Physical address space size"
@@ -716,10 +744,6 @@ config ARM64_PA_BITS_52
 
 endchoice
 
-config ARM64_52BIT_VA
-	def_bool y
-	depends on ARM64_VA_BITS_48 && ARM64_64K_PAGES && (ARM64_PAN || !ARM64_SW_TTBR0_PAN)
-
 config ARM64_PA_BITS
 	int
 	default 48 if ARM64_PA_BITS_48
@@ -1186,19 +1210,6 @@ config ARM64_CNP
 	  at runtime, and does not affect PEs that do not implement
 	  this feature.
 
-config ARM64_FORCE_52BIT
-	bool "Force 52-bit virtual addresses for userspace"
-	depends on ARM64_52BIT_VA && EXPERT
-	help
-	  For systems with 52-bit userspace VAs enabled, the kernel will attempt
-	  to maintain compatibility with older software by providing 48-bit VAs
-	  unless a hint is supplied to mmap.
-
-	  This configuration option disables the 48-bit compatibility logic, and
-	  forces all userspace addresses to be 52-bit on HW that supports it. One
-	  should only enable this configuration option for stress testing userspace
-	  memory management code. If unsure say N here.
-
 endmenu
 
 config ARM64_SVE
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 122d91d4097a..ce985f13dce5 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -549,7 +549,7 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
  * 	ttbr: Value of ttbr to set, modified.
  */
 	.macro	offset_ttbr1, ttbr
-#ifdef CONFIG_ARM64_52BIT_VA
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
 	orr	\ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
 #endif
 	.endm
@@ -560,7 +560,7 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
  * to be nop'ed out when dealing with 52-bit kernel VAs.
  */
 	.macro	restore_ttbr1, ttbr
-#ifdef CONFIG_ARM64_52BIT_VA
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
 	bic	\ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
 #endif
 	.endm
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index b0768502fa08..2da3e478fd8f 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -74,7 +74,7 @@ extern u64 idmap_ptrs_per_pgd;
 
 static inline bool __cpu_uses_extended_idmap(void)
 {
-	if (IS_ENABLED(CONFIG_ARM64_52BIT_VA))
+	if (IS_ENABLED(CONFIG_ARM64_USER_VA_BITS_52))
 		return false;
 
 	return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS));
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index d5219f2624b7..41c808d9168a 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -80,7 +80,7 @@
 #define PGDIR_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
 #define PGDIR_SIZE		(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK		(~(PGDIR_SIZE-1))
-#ifdef CONFIG_ARM64_52BIT_VA
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
 #define PTRS_PER_PGD		(1 << (52 - PGDIR_SHIFT))
 #else
 #define PTRS_PER_PGD		(1 << (VA_BITS - PGDIR_SHIFT))
@@ -310,7 +310,7 @@
 #define TTBR_BADDR_MASK_52	(((UL(1) << 46) - 1) << 2)
 #endif
 
-#ifdef CONFIG_ARM64_52BIT_VA
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
 /* Must be at least 64-byte aligned to prevent corruption of the TTBR */
 #define TTBR1_BADDR_4852_OFFSET	(((UL(1) << (52 - PGDIR_SHIFT)) - \
 				 (UL(1) << (48 - PGDIR_SHIFT))) * 8)
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index efa0210cf927..538ecbc15067 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -20,11 +20,11 @@
 #define __ASM_PROCESSOR_H
 
 #define KERNEL_DS		UL(-1)
-#ifdef CONFIG_ARM64_52BIT_VA
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
 #define USER_DS			((UL(1) << 52) - 1)
 #else
 #define USER_DS			((UL(1) << VA_BITS) - 1)
-#endif /* CONFIG_ARM64_52BIT_VA */
+#endif /* CONFIG_ARM64_USER_VA_BITS_52 */
 
 /*
  * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index c229d9cfe9bf..6b70dd625f01 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -318,7 +318,7 @@ __create_page_tables:
 	adrp	x0, idmap_pg_dir
 	adrp	x3, __idmap_text_start		// __pa(__idmap_text_start)
 
-#ifdef CONFIG_ARM64_52BIT_VA
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
 	mrs_s	x6, SYS_ID_AA64MMFR2_EL1
 	and	x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
 	mov	x5, #52
@@ -800,7 +800,7 @@ ENTRY(__enable_mmu)
 ENDPROC(__enable_mmu)
 
 ENTRY(__cpu_secondary_check52bitva)
-#ifdef CONFIG_ARM64_52BIT_VA
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
 	ldr_l	x0, vabits_user
 	cmp	x0, #52
 	b.ne	2f
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index e15b0b64d4d0..1ff18f5fbecb 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -139,7 +139,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 		if (!cpu_online(cpu)) {
 			pr_crit("CPU%u: failed to come online\n", cpu);
 
-			if (IS_ENABLED(CONFIG_ARM64_52BIT_VA) && va52mismatch)
+			if (IS_ENABLED(CONFIG_ARM64_USER_VA_BITS_52) && va52mismatch)
 				pr_crit("CPU%u: does not support 52-bit VAs\n", cpu);
 
 			ret = -EIO;
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 0cf86b17714c..e05b3ce1db6b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -451,8 +451,8 @@ ENTRY(__cpu_setup)
 			TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
 			TCR_TBI0 | TCR_A1
 
-#ifdef CONFIG_ARM64_52BIT_VA
-	ldr_l 		x9, vabits_user
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+	ldr_l		x9, vabits_user
 	sub		x9, xzr, x9
 	add		x9, x9, #64
 #else
-- 
cgit 


From 66f16a24512fa44680504effe908df8326885594 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 10 Dec 2018 14:21:13 +0000
Subject: arm64: smp: Rework early feature mismatched detection

Rather than add additional variables to detect specific early feature
mismatches with secondary CPUs, we can instead dedicate the upper bits
of the CPU boot status word to flag specific mismatches.

This allows us to communicate both granule and VA-size mismatches back
to the primary CPU without the need for additional book-keeping.

Tested-by: Steve Capper <steve.capper@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/smp.h | 15 ++++++++++-----
 arch/arm64/kernel/head.S     | 12 ++++--------
 arch/arm64/kernel/smp.c      | 11 +++++------
 3 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index f82b447bd34f..1895561839a9 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -17,15 +17,20 @@
 #define __ASM_SMP_H
 
 /* Values for secondary_data.status */
+#define CPU_STUCK_REASON_SHIFT		(8)
+#define CPU_BOOT_STATUS_MASK		((1U << CPU_STUCK_REASON_SHIFT) - 1)
 
-#define CPU_MMU_OFF		(-1)
-#define CPU_BOOT_SUCCESS	(0)
+#define CPU_MMU_OFF			(-1)
+#define CPU_BOOT_SUCCESS		(0)
 /* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */
-#define CPU_KILL_ME		(1)
+#define CPU_KILL_ME			(1)
 /* The cpu couldn't die gracefully and is looping in the kernel */
-#define CPU_STUCK_IN_KERNEL	(2)
+#define CPU_STUCK_IN_KERNEL		(2)
 /* Fatal system error detected by secondary CPU, crash the system */
-#define CPU_PANIC_KERNEL	(3)
+#define CPU_PANIC_KERNEL		(3)
+
+#define CPU_STUCK_REASON_52_BIT_VA	(1U << CPU_STUCK_REASON_SHIFT)
+#define CPU_STUCK_REASON_NO_GRAN	(2U << CPU_STUCK_REASON_SHIFT)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 6b70dd625f01..eaa68ce6a06d 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -809,13 +809,8 @@ ENTRY(__cpu_secondary_check52bitva)
 	and	x0, x0, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
 	cbnz	x0, 2f
 
-	adr_l	x0, va52mismatch
-	mov	w1, #1
-	strb	w1, [x0]
-	dmb	sy
-	dc	ivac, x0	// Invalidate potentially stale cache line
-
-	update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x0, x1
+	update_early_cpu_boot_status \
+		CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1
 1:	wfe
 	wfi
 	b	1b
@@ -826,7 +821,8 @@ ENDPROC(__cpu_secondary_check52bitva)
 
 __no_granule_support:
 	/* Indicate that this CPU can't boot and is stuck in the kernel */
-	update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x1, x2
+	update_early_cpu_boot_status \
+		CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_NO_GRAN, x1, x2
 1:
 	wfe
 	wfi
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 1ff18f5fbecb..4e3bfbde829a 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -108,7 +108,6 @@ static int boot_secondary(unsigned int cpu, struct task_struct *idle)
 }
 
 static DECLARE_COMPLETION(cpu_running);
-bool va52mismatch __ro_after_init;
 
 int __cpu_up(unsigned int cpu, struct task_struct *idle)
 {
@@ -138,10 +137,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 
 		if (!cpu_online(cpu)) {
 			pr_crit("CPU%u: failed to come online\n", cpu);
-
-			if (IS_ENABLED(CONFIG_ARM64_USER_VA_BITS_52) && va52mismatch)
-				pr_crit("CPU%u: does not support 52-bit VAs\n", cpu);
-
 			ret = -EIO;
 		}
 	} else {
@@ -156,7 +151,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 		if (status == CPU_MMU_OFF)
 			status = READ_ONCE(__early_cpu_boot_status);
 
-		switch (status) {
+		switch (status & CPU_BOOT_STATUS_MASK) {
 		default:
 			pr_err("CPU%u: failed in unknown state : 0x%lx\n",
 					cpu, status);
@@ -170,6 +165,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 			pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
 		case CPU_STUCK_IN_KERNEL:
 			pr_crit("CPU%u: is stuck in kernel\n", cpu);
+			if (status & CPU_STUCK_REASON_52_BIT_VA)
+				pr_crit("CPU%u: does not support 52-bit VAs\n", cpu);
+			if (status & CPU_STUCK_REASON_NO_GRAN)
+				pr_crit("CPU%u: does not support %luK granule \n", cpu, PAGE_SIZE / SZ_1K);
 			cpus_stuck_in_kernel++;
 			break;
 		case CPU_PANIC_KERNEL:
-- 
cgit 


From 7faa313f05cad184e8b17750f0cbe5216ac6debb Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 11 Dec 2018 13:41:32 +0000
Subject: arm64: preempt: Fix big-endian when checking preempt count in
 assembly

Commit 396244692232 ("arm64: preempt: Provide our own implementation of
asm/preempt.h") extended the preempt count field in struct thread_info
to 64 bits, so that it consists of a 32-bit count plus a 32-bit flag
indicating whether or not the current task needs rescheduling.

Whilst the asm-offsets definition of TSK_TI_PREEMPT was updated to point
to this new field, the assembly usage was left untouched meaning that a
32-bit load from TSK_TI_PREEMPT on a big-endian machine actually returns
the reschedule flag instead of the count.

Whilst we could fix this by pointing TSK_TI_PREEMPT at the count field,
we're actually better off reworking the two assembly users so that they
operate on the whole 64-bit value in favour of inspecting the thread
flags separately in order to determine whether a reschedule is needed.

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reported-by: "kernelci.org bot" <bot@kernelci.org>
Tested-by: Kevin Hilman <khilman@baylibre.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/assembler.h | 8 +++-----
 arch/arm64/kernel/entry.S          | 6 ++----
 2 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index ce985f13dce5..4feb6119c3c9 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -722,11 +722,9 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 	.macro		if_will_cond_yield_neon
 #ifdef CONFIG_PREEMPT
 	get_thread_info	x0
-	ldr		w1, [x0, #TSK_TI_PREEMPT]
-	ldr		x0, [x0, #TSK_TI_FLAGS]
-	cmp		w1, #PREEMPT_DISABLE_OFFSET
-	csel		x0, x0, xzr, eq
-	tbnz		x0, #TIF_NEED_RESCHED, .Lyield_\@	// needs rescheduling?
+	ldr		x0, [x0, #TSK_TI_PREEMPT]
+	sub		x0, x0, #PREEMPT_DISABLE_OFFSET
+	cbz		x0, .Lyield_\@
 	/* fall through to endif_yield_neon */
 	.subsection	1
 .Lyield_\@ :
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c7d050207e53..763f03dc4d9e 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -619,10 +619,8 @@ el1_irq:
 	irq_handler
 
 #ifdef CONFIG_PREEMPT
-	ldr	w24, [tsk, #TSK_TI_PREEMPT]	// get preempt count
-	cbnz	w24, 1f				// preempt count != 0
-	ldr	x0, [tsk, #TSK_TI_FLAGS]	// get flags
-	tbz	x0, #TIF_NEED_RESCHED, 1f	// needs rescheduling?
+	ldr	x24, [tsk, #TSK_TI_PREEMPT]	// get preempt count
+	cbnz	x24, 1f				// preempt count != 0
 	bl	el1_preempt
 1:
 #endif
-- 
cgit 


From 9b31cf493ffa40914e02998381993116e574c651 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 12 Dec 2018 11:51:40 +0000
Subject: arm64: mm: Introduce MAX_USER_VA_BITS definition

With the introduction of 52-bit virtual addressing for userspace, we are
now in a position where the virtual addressing capability of userspace
may exceed that of the kernel. Consequently, the VA_BITS definition
cannot be used blindly, since it reflects only the size of kernel
virtual addresses.

This patch introduces MAX_USER_VA_BITS which is either VA_BITS or 52
depending on whether 52-bit virtual addressing has been configured at
build time, removing a few places where the 52 is open-coded based on
explicit CONFIG_ guards.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/memory.h        | 6 ++++++
 arch/arm64/include/asm/pgtable-hwdef.h | 6 +-----
 arch/arm64/include/asm/processor.h     | 6 +-----
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 56562ff01076..6747a3eddeb1 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -73,6 +73,12 @@
 #define KERNEL_START      _text
 #define KERNEL_END        _end
 
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+#define MAX_USER_VA_BITS	52
+#else
+#define MAX_USER_VA_BITS	VA_BITS
+#endif
+
 /*
  * KASAN requires 1/8th of the kernel virtual address space for the shadow
  * region. KASAN can bloat the stack significantly, so double the (minimum)
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 2a700f7b12d2..54a37660b8c9 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -80,11 +80,7 @@
 #define PGDIR_SHIFT		ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
 #define PGDIR_SIZE		(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK		(~(PGDIR_SIZE-1))
-#ifdef CONFIG_ARM64_USER_VA_BITS_52
-#define PTRS_PER_PGD		(1 << (52 - PGDIR_SHIFT))
-#else
-#define PTRS_PER_PGD		(1 << (VA_BITS - PGDIR_SHIFT))
-#endif
+#define PTRS_PER_PGD		(1 << (MAX_USER_VA_BITS - PGDIR_SHIFT))
 
 /*
  * Section address mask and size definitions.
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 538ecbc15067..bbecc6fe3e5b 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -20,11 +20,7 @@
 #define __ASM_PROCESSOR_H
 
 #define KERNEL_DS		UL(-1)
-#ifdef CONFIG_ARM64_USER_VA_BITS_52
-#define USER_DS			((UL(1) << 52) - 1)
-#else
-#define USER_DS			((UL(1) << VA_BITS) - 1)
-#endif /* CONFIG_ARM64_USER_VA_BITS_52 */
+#define USER_DS			((UL(1) << MAX_USER_VA_BITS) - 1)
 
 /*
  * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
-- 
cgit 


From c3296a1391cb0ffbea95ee2249af63cb45e4899b Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 12 Dec 2018 12:22:19 +0000
Subject: arm64: add <asm/asm-prototypes.h>

While we can export symbols from assembly files, CONFIG_MODVERIONS requires C
declarations of anyhting that's exported.

Let's account for this as other architectures do by placing these declarations
in <asm/asm-prototypes.h>, which kbuild will automatically use to generate
modversion information for assembly files.

Since we already define most prototypes in existing headers, we simply need to
include those headers in <asm/asm-prototypes.h>, and don't need to duplicate
these.

Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/asm-prototypes.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 arch/arm64/include/asm/asm-prototypes.h

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/asm-prototypes.h b/arch/arm64/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..2173ad32d550
--- /dev/null
+++ b/arch/arm64/include/asm/asm-prototypes.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PROTOTYPES_H
+#define __ASM_PROTOTYPES_H
+/*
+ * CONFIG_MODEVERIONS requires a C declaration to generate the appropriate CRC
+ * for each symbol. Since commit:
+ *
+ *   4efca4ed05cbdfd1 ("kbuild: modversions for EXPORT_SYMBOL() for asm")
+ *
+ * ... kbuild will automatically pick these up from <asm/asm-prototypes.h> and
+ * feed this to genksyms when building assembly files.
+ */
+#include <linux/arm-smccc.h>
+
+#include <asm/ftrace.h>
+#include <asm/page.h>
+#include <asm/string.h>
+#include <asm/uaccess.h>
+
+#include <asm-generic/asm-prototypes.h>
+
+long long __ashlti3(long long a, int b);
+long long __ashrti3(long long a, int b);
+long long __lshrti3(long long a, int b);
+
+#endif /* __ASM_PROTOTYPES_H */
-- 
cgit 


From 6e4ede698d1c17102e42c6f734f7d8cf60df28d5 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 12 Dec 2018 14:17:20 +0000
Subject: arm64: percpu: Fix LSE implementation of value-returning pcpu atomics

Commit 959bf2fd03b5 ("arm64: percpu: Rewrite per-cpu ops to allow use of
LSE atomics") introduced alternative code sequences for the arm64 percpu
atomics, so that the LSE instructions can be patched in at runtime if
they are supported by the CPU.

Unfortunately, when patching in the LSE sequence for a value-returning
pcpu atomic, the argument registers are the wrong way round. The
implementation of this_cpu_add_return() therefore ends up adding
uninitialised stack to the percpu variable and returning garbage.

As it turns out, there aren't very many users of the value-returning
percpu atomics in mainline and we only spotted this due to a failure in
the kprobes selftests. In this case, when attempting to single-step over
the out-of-line instruction slot, the debug monitors would not be
enabled because calling this_cpu_inc_return() on the kernel debug
monitor refcount would fail to detect the transition from 0. We would
consequently execute past the slot and take an undefined instruction
exception from the kernel, resulting in a BUG:

 | kernel BUG at arch/arm64/kernel/traps.c:421!
 | PREEMPT SMP
 | pc : do_undefinstr+0x268/0x278
 | lr : do_undefinstr+0x124/0x278
 | Process swapper/0 (pid: 1, stack limit = 0x(____ptrval____))
 | Call trace:
 |  do_undefinstr+0x268/0x278
 |  el1_undef+0x10/0x78
 |  0xffff00000803c004
 |  init_kprobes+0x150/0x180
 |  do_one_initcall+0x74/0x178
 |  kernel_init_freeable+0x188/0x224
 |  kernel_init+0x10/0x100
 |  ret_from_fork+0x10/0x1c

Fix the argument order to get the value-returning pcpu atomics working
correctly when implemented using the LSE instructions.

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Tested-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/percpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index f7b1bbbb6f12..6b81dd8cee01 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -94,7 +94,7 @@ __percpu_##name##_return_case_##sz(void *ptr, unsigned long val)	\
 	"	stxr" #sfx "\t%w[loop], %" #w "[ret], %[ptr]\n"		\
 	"	cbnz	%w[loop], 1b",					\
 	/* LSE atomics */						\
-		#op_lse "\t%" #w "[ret], %" #w "[val], %[ptr]\n"	\
+		#op_lse "\t%" #w "[val], %" #w "[ret], %[ptr]\n"	\
 		#op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n"	\
 		__nops(2))						\
 	: [loop] "=&r" (loop), [ret] "=&r" (ret),			\
-- 
cgit 


From 0a1213fa7432778b71a1c0166bf56660a3aab030 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Wed, 12 Dec 2018 13:08:44 +0100
Subject: arm64: enable per-task stack canaries

This enables the use of per-task stack canary values if GCC has
support for emitting the stack canary reference relative to the
value of sp_el0, which holds the task struct pointer in the arm64
kernel.

The $(eval) extends KBUILD_CFLAGS at the moment the make rule is
applied, which means asm-offsets.o (which we rely on for the offset
value) is built without the arguments, and everything built afterwards
has the options set.

Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/Kconfig                      |  7 +++++++
 arch/arm64/Makefile                     | 10 ++++++++++
 arch/arm64/include/asm/stackprotector.h |  3 ++-
 arch/arm64/kernel/asm-offsets.c         |  3 +++
 arch/arm64/kernel/process.c             |  2 +-
 5 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e9af113c572f..0b3aa2a894a7 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1344,6 +1344,13 @@ config RANDOMIZE_MODULE_REGION_FULL
 	  a limited range that contains the [_stext, _etext] interval of the
 	  core kernel, so branch relocations are always in range.
 
+config CC_HAVE_STACKPROTECTOR_SYSREG
+	def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0)
+
+config STACKPROTECTOR_PER_TASK
+	def_bool y
+	depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_SYSREG
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 8978f60779c4..398bdb81a900 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -56,6 +56,16 @@ KBUILD_AFLAGS	+= $(lseinstr) $(brokengasinst)
 KBUILD_CFLAGS	+= $(call cc-option,-mabi=lp64)
 KBUILD_AFLAGS	+= $(call cc-option,-mabi=lp64)
 
+ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
+prepare: stack_protector_prepare
+stack_protector_prepare: prepare0
+	$(eval KBUILD_CFLAGS += -mstack-protector-guard=sysreg		  \
+				-mstack-protector-guard-reg=sp_el0	  \
+				-mstack-protector-guard-offset=$(shell	  \
+			awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}' \
+					include/generated/asm-offsets.h))
+endif
+
 ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
 KBUILD_CPPFLAGS	+= -mbig-endian
 CHECKFLAGS	+= -D__AARCH64EB__
diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h
index 58d15be11c4d..5884a2b02827 100644
--- a/arch/arm64/include/asm/stackprotector.h
+++ b/arch/arm64/include/asm/stackprotector.h
@@ -34,7 +34,8 @@ static __always_inline void boot_init_stack_canary(void)
 	canary &= CANARY_MASK;
 
 	current->stack_canary = canary;
-	__stack_chk_guard = current->stack_canary;
+	if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK))
+		__stack_chk_guard = current->stack_canary;
 }
 
 #endif	/* _ASM_STACKPROTECTOR_H */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 323aeb5f2fe6..65b8afc84466 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -46,6 +46,9 @@ int main(void)
   DEFINE(TSK_TI_TTBR0,		offsetof(struct task_struct, thread_info.ttbr0));
 #endif
   DEFINE(TSK_STACK,		offsetof(struct task_struct, stack));
+#ifdef CONFIG_STACKPROTECTOR
+  DEFINE(TSK_STACK_CANARY,	offsetof(struct task_struct, stack_canary));
+#endif
   BLANK();
   DEFINE(THREAD_CPU_CONTEXT,	offsetof(struct task_struct, thread.cpu_context));
   BLANK();
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index d9a4c2d6dd8b..8a2d68f04e0d 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -59,7 +59,7 @@
 #include <asm/processor.h>
 #include <asm/stacktrace.h>
 
-#ifdef CONFIG_STACKPROTECTOR
+#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
 #include <linux/stackprotector.h>
 unsigned long __stack_chk_guard __read_mostly;
 EXPORT_SYMBOL(__stack_chk_guard);
-- 
cgit 


From 26a25c841d9e6ba4ceba4b8d7ce3f3227f7510ce Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 13 Dec 2018 15:34:44 +0000
Subject: arm64: perf: Treat EXCLUDE_EL* bit definitions as unsigned

Although the upper 32 bits of the PMEVTYPER<n>_EL0 registers are RES0,
we should treat the EXCLUDE_EL* bit definitions as unsigned so that we
avoid accidentally sign-extending the privilege filtering bit (bit 31)
into the upper half of the register.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/perf_event.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h
index c63e5e4fdccd..c593761ba61c 100644
--- a/arch/arm64/include/asm/perf_event.h
+++ b/arch/arm64/include/asm/perf_event.h
@@ -206,9 +206,9 @@
 /*
  * Event filters for PMUv3
  */
-#define	ARMV8_PMU_EXCLUDE_EL1	(1 << 31)
-#define	ARMV8_PMU_EXCLUDE_EL0	(1 << 30)
-#define	ARMV8_PMU_INCLUDE_EL2	(1 << 27)
+#define	ARMV8_PMU_EXCLUDE_EL1	(1U << 31)
+#define	ARMV8_PMU_EXCLUDE_EL0	(1U << 30)
+#define	ARMV8_PMU_INCLUDE_EL2	(1U << 27)
 
 /*
  * PMUSERENR: user enable reg
-- 
cgit 


From 15560657358539aafcac027445f1f34c29a889a6 Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Fri, 7 Dec 2018 18:39:19 +0000
Subject: arm64: add comments about EC exception levels

To make it clear which exceptions can't be taken to EL1 or EL2, add
comments next to the ESR_ELx_EC_* macro definitions.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/esr.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 676de2ec1762..23602a0083ad 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -29,23 +29,23 @@
 #define ESR_ELx_EC_CP14_MR	(0x05)
 #define ESR_ELx_EC_CP14_LS	(0x06)
 #define ESR_ELx_EC_FP_ASIMD	(0x07)
-#define ESR_ELx_EC_CP10_ID	(0x08)
+#define ESR_ELx_EC_CP10_ID	(0x08)	/* EL2 only */
 /* Unallocated EC: 0x09 - 0x0B */
 #define ESR_ELx_EC_CP14_64	(0x0C)
 /* Unallocated EC: 0x0d */
 #define ESR_ELx_EC_ILL		(0x0E)
 /* Unallocated EC: 0x0F - 0x10 */
 #define ESR_ELx_EC_SVC32	(0x11)
-#define ESR_ELx_EC_HVC32	(0x12)
-#define ESR_ELx_EC_SMC32	(0x13)
+#define ESR_ELx_EC_HVC32	(0x12)	/* EL2 only */
+#define ESR_ELx_EC_SMC32	(0x13)	/* EL2 and above */
 /* Unallocated EC: 0x14 */
 #define ESR_ELx_EC_SVC64	(0x15)
-#define ESR_ELx_EC_HVC64	(0x16)
-#define ESR_ELx_EC_SMC64	(0x17)
+#define ESR_ELx_EC_HVC64	(0x16)	/* EL2 and above */
+#define ESR_ELx_EC_SMC64	(0x17)	/* EL2 and above */
 #define ESR_ELx_EC_SYS64	(0x18)
 #define ESR_ELx_EC_SVE		(0x19)
 /* Unallocated EC: 0x1A - 0x1E */
-#define ESR_ELx_EC_IMP_DEF	(0x1f)
+#define ESR_ELx_EC_IMP_DEF	(0x1f)	/* EL3 only */
 #define ESR_ELx_EC_IABT_LOW	(0x20)
 #define ESR_ELx_EC_IABT_CUR	(0x21)
 #define ESR_ELx_EC_PC_ALIGN	(0x22)
@@ -68,7 +68,7 @@
 /* Unallocated EC: 0x36 - 0x37 */
 #define ESR_ELx_EC_BKPT32	(0x38)
 /* Unallocated EC: 0x39 */
-#define ESR_ELx_EC_VECTOR32	(0x3A)
+#define ESR_ELx_EC_VECTOR32	(0x3A)	/* EL2 only */
 /* Unallocted EC: 0x3B */
 #define ESR_ELx_EC_BRK64	(0x3C)
 /* Unallocated EC: 0x3D - 0x3F */
-- 
cgit 


From aa6eece8ec5095e479a354365f5e5102bfab4f5a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 7 Dec 2018 18:39:20 +0000
Subject: arm64: add pointer authentication register bits

The ARMv8.3 pointer authentication extension adds:

* New fields in ID_AA64ISAR1 to report the presence of pointer
  authentication functionality.

* New control bits in SCTLR_ELx to enable this functionality.

* New system registers to hold the keys necessary for this
  functionality.

* A new ESR_ELx.EC code used when the new instructions are affected by
  configurable traps

This patch adds the relevant definitions to <asm/sysreg.h> and
<asm/esr.h> for these, to be used by subsequent patches.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/esr.h    |  3 ++-
 arch/arm64/include/asm/sysreg.h | 30 ++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 23602a0083ad..52233f00d53d 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -30,7 +30,8 @@
 #define ESR_ELx_EC_CP14_LS	(0x06)
 #define ESR_ELx_EC_FP_ASIMD	(0x07)
 #define ESR_ELx_EC_CP10_ID	(0x08)	/* EL2 only */
-/* Unallocated EC: 0x09 - 0x0B */
+#define ESR_ELx_EC_PAC		(0x09)	/* EL2 and above */
+/* Unallocated EC: 0x0A - 0x0B */
 #define ESR_ELx_EC_CP14_64	(0x0C)
 /* Unallocated EC: 0x0d */
 #define ESR_ELx_EC_ILL		(0x0E)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 78f15858535e..bb15d09daf56 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -188,6 +188,19 @@
 #define SYS_TTBR1_EL1			sys_reg(3, 0, 2, 0, 1)
 #define SYS_TCR_EL1			sys_reg(3, 0, 2, 0, 2)
 
+#define SYS_APIAKEYLO_EL1		sys_reg(3, 0, 2, 1, 0)
+#define SYS_APIAKEYHI_EL1		sys_reg(3, 0, 2, 1, 1)
+#define SYS_APIBKEYLO_EL1		sys_reg(3, 0, 2, 1, 2)
+#define SYS_APIBKEYHI_EL1		sys_reg(3, 0, 2, 1, 3)
+
+#define SYS_APDAKEYLO_EL1		sys_reg(3, 0, 2, 2, 0)
+#define SYS_APDAKEYHI_EL1		sys_reg(3, 0, 2, 2, 1)
+#define SYS_APDBKEYLO_EL1		sys_reg(3, 0, 2, 2, 2)
+#define SYS_APDBKEYHI_EL1		sys_reg(3, 0, 2, 2, 3)
+
+#define SYS_APGAKEYLO_EL1		sys_reg(3, 0, 2, 3, 0)
+#define SYS_APGAKEYHI_EL1		sys_reg(3, 0, 2, 3, 1)
+
 #define SYS_ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
 
 #define SYS_AFSR0_EL1			sys_reg(3, 0, 5, 1, 0)
@@ -437,9 +450,13 @@
 
 /* Common SCTLR_ELx flags. */
 #define SCTLR_ELx_DSSBS	(1UL << 44)
+#define SCTLR_ELx_ENIA	(1U << 31)
+#define SCTLR_ELx_ENIB	(1 << 30)
+#define SCTLR_ELx_ENDA	(1 << 27)
 #define SCTLR_ELx_EE    (1 << 25)
 #define SCTLR_ELx_IESB	(1 << 21)
 #define SCTLR_ELx_WXN	(1 << 19)
+#define SCTLR_ELx_ENDB	(1 << 13)
 #define SCTLR_ELx_I	(1 << 12)
 #define SCTLR_ELx_SA	(1 << 3)
 #define SCTLR_ELx_C	(1 << 2)
@@ -534,11 +551,24 @@
 
 /* id_aa64isar1 */
 #define ID_AA64ISAR1_SB_SHIFT		36
+#define ID_AA64ISAR1_GPI_SHIFT		28
+#define ID_AA64ISAR1_GPA_SHIFT		24
 #define ID_AA64ISAR1_LRCPC_SHIFT	20
 #define ID_AA64ISAR1_FCMA_SHIFT		16
 #define ID_AA64ISAR1_JSCVT_SHIFT	12
+#define ID_AA64ISAR1_API_SHIFT		8
+#define ID_AA64ISAR1_APA_SHIFT		4
 #define ID_AA64ISAR1_DPB_SHIFT		0
 
+#define ID_AA64ISAR1_APA_NI		0x0
+#define ID_AA64ISAR1_APA_ARCHITECTED	0x1
+#define ID_AA64ISAR1_API_NI		0x0
+#define ID_AA64ISAR1_API_IMP_DEF	0x1
+#define ID_AA64ISAR1_GPA_NI		0x0
+#define ID_AA64ISAR1_GPA_ARCHITECTED	0x1
+#define ID_AA64ISAR1_GPI_NI		0x0
+#define ID_AA64ISAR1_GPI_IMP_DEF	0x1
+
 /* id_aa64pfr0 */
 #define ID_AA64PFR0_CSV3_SHIFT		60
 #define ID_AA64PFR0_CSV2_SHIFT		56
-- 
cgit 


From 4eaed6aa2c628101246bcabc91b203bfac1193f8 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 7 Dec 2018 18:39:21 +0000
Subject: arm64/kvm: consistently handle host HCR_EL2 flags

In KVM we define the configuration of HCR_EL2 for a VHE HOST in
HCR_HOST_VHE_FLAGS, but we don't have a similar definition for the
non-VHE host flags, and open-code HCR_RW. Further, in head.S we
open-code the flags for VHE and non-VHE configurations.

In future, we're going to want to configure more flags for the host, so
lets add a HCR_HOST_NVHE_FLAGS defintion, and consistently use both
HCR_HOST_VHE_FLAGS and HCR_HOST_NVHE_FLAGS in the kvm code and head.S.

We now use mov_q to generate the HCR_EL2 value, as we use when
configuring other registers in head.S.

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: kvmarm@lists.cs.columbia.edu
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/kvm_arm.h | 1 +
 arch/arm64/kernel/head.S         | 5 ++---
 arch/arm64/kvm/hyp/switch.c      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 6f602af5263c..c8825c5a8dd0 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -87,6 +87,7 @@
 			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
 			 HCR_FMO | HCR_IMO)
 #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
+#define HCR_HOST_NVHE_FLAGS (HCR_RW)
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
 
 /* TCR_EL2 Registers bits */
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 6d6e3bca68a9..c7213674cb24 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -510,10 +510,9 @@ ENTRY(el2_setup)
 #endif
 
 	/* Hyp configuration. */
-	mov	x0, #HCR_RW			// 64-bit EL1
+	mov_q	x0, HCR_HOST_NVHE_FLAGS
 	cbz	x2, set_hcr
-	orr	x0, x0, #HCR_TGE		// Enable Host Extensions
-	orr	x0, x0, #HCR_E2H
+	mov_q	x0, HCR_HOST_VHE_FLAGS
 set_hcr:
 	msr	hcr_el2, x0
 	isb
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 31ee0bfc432f..63ac10ead3a8 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -165,7 +165,7 @@ static void __hyp_text __deactivate_traps_nvhe(void)
 	mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
 
 	write_sysreg(mdcr_el2, mdcr_el2);
-	write_sysreg(HCR_RW, hcr_el2);
+	write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
 	write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
 }
 
-- 
cgit 


From b3669b1e1c09890d61109a1a8ece2c5b66804714 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 7 Dec 2018 18:39:23 +0000
Subject: arm64: Don't trap host pointer auth use to EL2

To allow EL0 (and/or EL1) to use pointer authentication functionality,
we must ensure that pointer authentication instructions and accesses to
pointer authentication keys are not trapped to EL2.

This patch ensures that HCR_EL2 is configured appropriately when the
kernel is booted at EL2. For non-VHE kernels we set HCR_EL2.{API,APK},
ensuring that EL1 can access keys and permit EL0 use of instructions.
For VHE kernels host EL0 (TGE && E2H) is unaffected by these settings,
and it doesn't matter how we configure HCR_EL2.{API,APK}, so we don't
bother setting them.

This does not enable support for KVM guests, since KVM manages HCR_EL2
itself when running VMs.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: kvmarm@lists.cs.columbia.edu
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/kvm_arm.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index c8825c5a8dd0..f9123fe8fcf3 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -24,6 +24,8 @@
 
 /* Hyp Configuration Register (HCR) bits */
 #define HCR_FWB		(UL(1) << 46)
+#define HCR_API		(UL(1) << 41)
+#define HCR_APK		(UL(1) << 40)
 #define HCR_TEA		(UL(1) << 37)
 #define HCR_TERR	(UL(1) << 36)
 #define HCR_TLOR	(UL(1) << 35)
@@ -87,7 +89,7 @@
 			 HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
 			 HCR_FMO | HCR_IMO)
 #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
-#define HCR_HOST_NVHE_FLAGS (HCR_RW)
+#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK)
 #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
 
 /* TCR_EL2 Registers bits */
-- 
cgit 


From 6984eb47d5c1a74bb44467ee4eee22d680f10785 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 7 Dec 2018 18:39:24 +0000
Subject: arm64/cpufeature: detect pointer authentication

So that we can dynamically handle the presence of pointer authentication
functionality, wire up probing code in cpufeature.c.

From ARMv8.3 onwards, ID_AA64ISAR1 is no longer entirely RES0, and now
has four fields describing the presence of pointer authentication
functionality:

* APA - address authentication present, using an architected algorithm
* API - address authentication present, using an IMP DEF algorithm
* GPA - generic authentication present, using an architected algorithm
* GPI - generic authentication present, using an IMP DEF algorithm

This patch checks for both address and generic authentication,
separately. It is assumed that if all CPUs support an IMP DEF algorithm,
the same algorithm is used across all CPUs.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpucaps.h    |  8 +++-
 arch/arm64/include/asm/cpufeature.h | 12 +++++
 arch/arm64/kernel/cpufeature.c      | 90 +++++++++++++++++++++++++++++++++++++
 3 files changed, 109 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index a89f587d4842..803f388e81d4 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -56,7 +56,13 @@
 #define ARM64_WORKAROUND_1188873		35
 #define ARM64_HAS_SB				36
 #define ARM64_WORKAROUND_1165522		37
+#define ARM64_HAS_ADDRESS_AUTH_ARCH		38
+#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF		39
+#define ARM64_HAS_ADDRESS_AUTH			40
+#define ARM64_HAS_GENERIC_AUTH_ARCH		41
+#define ARM64_HAS_GENERIC_AUTH_IMP_DEF		42
+#define ARM64_HAS_GENERIC_AUTH			43
 
-#define ARM64_NCAPS				38
+#define ARM64_NCAPS				44
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 38e674f6e18b..daec4b50d7fb 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -565,6 +565,18 @@ static inline bool system_supports_cnp(void)
 		cpus_have_const_cap(ARM64_HAS_CNP);
 }
 
+static inline bool system_supports_address_auth(void)
+{
+	return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) &&
+		cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH);
+}
+
+static inline bool system_supports_generic_auth(void)
+{
+	return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) &&
+		cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH);
+}
+
 #define ARM64_SSBD_UNKNOWN		-1
 #define ARM64_SSBD_FORCE_DISABLE	0
 #define ARM64_SSBD_KERNEL		1
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 7b6f26fd075f..c82ebd8f0087 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -143,9 +143,17 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
 
 static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPI_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_API_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
+		       FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_APA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
@@ -1182,6 +1190,36 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
 }
 #endif /* CONFIG_ARM64_RAS_EXTN */
 
+#ifdef CONFIG_ARM64_PTR_AUTH
+static bool has_address_auth(const struct arm64_cpu_capabilities *entry,
+			     int __unused)
+{
+	u64 isar1 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
+	bool api, apa;
+
+	apa = cpuid_feature_extract_unsigned_field(isar1,
+					ID_AA64ISAR1_APA_SHIFT) > 0;
+	api = cpuid_feature_extract_unsigned_field(isar1,
+					ID_AA64ISAR1_API_SHIFT) > 0;
+
+	return apa || api;
+}
+
+static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
+			     int __unused)
+{
+	u64 isar1 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
+	bool gpi, gpa;
+
+	gpa = cpuid_feature_extract_unsigned_field(isar1,
+					ID_AA64ISAR1_GPA_SHIFT) > 0;
+	gpi = cpuid_feature_extract_unsigned_field(isar1,
+					ID_AA64ISAR1_GPI_SHIFT) > 0;
+
+	return gpa || gpi;
+}
+#endif /* CONFIG_ARM64_PTR_AUTH */
+
 static const struct arm64_cpu_capabilities arm64_features[] = {
 	{
 		.desc = "GIC system register CPU interface",
@@ -1415,6 +1453,58 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.sign = FTR_UNSIGNED,
 		.min_field_value = 1,
 	},
+#ifdef CONFIG_ARM64_PTR_AUTH
+	{
+		.desc = "Address authentication (architected algorithm)",
+		.capability = ARM64_HAS_ADDRESS_AUTH_ARCH,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.sys_reg = SYS_ID_AA64ISAR1_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64ISAR1_APA_SHIFT,
+		.min_field_value = ID_AA64ISAR1_APA_ARCHITECTED,
+		.matches = has_cpuid_feature,
+	},
+	{
+		.desc = "Address authentication (IMP DEF algorithm)",
+		.capability = ARM64_HAS_ADDRESS_AUTH_IMP_DEF,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.sys_reg = SYS_ID_AA64ISAR1_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64ISAR1_API_SHIFT,
+		.min_field_value = ID_AA64ISAR1_API_IMP_DEF,
+		.matches = has_cpuid_feature,
+	},
+	{
+		.capability = ARM64_HAS_ADDRESS_AUTH,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_address_auth,
+	},
+	{
+		.desc = "Generic authentication (architected algorithm)",
+		.capability = ARM64_HAS_GENERIC_AUTH_ARCH,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.sys_reg = SYS_ID_AA64ISAR1_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64ISAR1_GPA_SHIFT,
+		.min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED,
+		.matches = has_cpuid_feature,
+	},
+	{
+		.desc = "Generic authentication (IMP DEF algorithm)",
+		.capability = ARM64_HAS_GENERIC_AUTH_IMP_DEF,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.sys_reg = SYS_ID_AA64ISAR1_EL1,
+		.sign = FTR_UNSIGNED,
+		.field_pos = ID_AA64ISAR1_GPI_SHIFT,
+		.min_field_value = ID_AA64ISAR1_GPI_IMP_DEF,
+		.matches = has_cpuid_feature,
+	},
+	{
+		.capability = ARM64_HAS_GENERIC_AUTH,
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
+		.matches = has_generic_auth,
+	},
+#endif /* CONFIG_ARM64_PTR_AUTH */
 	{},
 };
 
-- 
cgit 


From 7503197562567b57ec14feb3a9d5400ebc56812f Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 7 Dec 2018 18:39:25 +0000
Subject: arm64: add basic pointer authentication support

This patch adds basic support for pointer authentication, allowing
userspace to make use of APIAKey, APIBKey, APDAKey, APDBKey, and
APGAKey. The kernel maintains key values for each process (shared by all
threads within), which are initialised to random values at exec() time.

The ID_AA64ISAR1_EL1.{APA,API,GPA,GPI} fields are exposed to userspace,
to describe that pointer authentication instructions are available and
that the kernel is managing the keys. Two new hwcaps are added for the
same reason: PACA (for address authentication) and PACG (for generic
authentication).

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Tested-by: Adam Wallis <awallis@codeaurora.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
[will: Fix sizeof() usage and unroll address key initialisation]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/pointer_auth.h | 79 +++++++++++++++++++++++++++++++++++
 arch/arm64/include/asm/thread_info.h  |  4 ++
 arch/arm64/include/uapi/asm/hwcap.h   |  2 +
 arch/arm64/kernel/cpufeature.c        | 13 ++++++
 arch/arm64/kernel/cpuinfo.c           |  2 +
 arch/arm64/kernel/process.c           |  4 ++
 6 files changed, 104 insertions(+)
 create mode 100644 arch/arm64/include/asm/pointer_auth.h

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
new file mode 100644
index 000000000000..91c4185dda5b
--- /dev/null
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __ASM_POINTER_AUTH_H
+#define __ASM_POINTER_AUTH_H
+
+#include <linux/random.h>
+
+#include <asm/cpufeature.h>
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+/*
+ * Each key is a 128-bit quantity which is split across a pair of 64-bit
+ * registers (Lo and Hi).
+ */
+struct ptrauth_key {
+	unsigned long lo, hi;
+};
+
+/*
+ * We give each process its own keys, which are shared by all threads. The keys
+ * are inherited upon fork(), and reinitialised upon exec*().
+ */
+struct ptrauth_keys {
+	struct ptrauth_key apia;
+	struct ptrauth_key apib;
+	struct ptrauth_key apda;
+	struct ptrauth_key apdb;
+	struct ptrauth_key apga;
+};
+
+static inline void ptrauth_keys_init(struct ptrauth_keys *keys)
+{
+	if (system_supports_address_auth()) {
+		get_random_bytes(&keys->apia, sizeof(keys->apia));
+		get_random_bytes(&keys->apib, sizeof(keys->apib));
+		get_random_bytes(&keys->apda, sizeof(keys->apda));
+		get_random_bytes(&keys->apdb, sizeof(keys->apdb));
+	}
+
+	if (system_supports_generic_auth())
+		get_random_bytes(&keys->apga, sizeof(keys->apga));
+}
+
+#define __ptrauth_key_install(k, v)				\
+do {								\
+	struct ptrauth_key __pki_v = (v);			\
+	write_sysreg_s(__pki_v.lo, SYS_ ## k ## KEYLO_EL1);	\
+	write_sysreg_s(__pki_v.hi, SYS_ ## k ## KEYHI_EL1);	\
+} while (0)
+
+static inline void ptrauth_keys_switch(struct ptrauth_keys *keys)
+{
+	if (system_supports_address_auth()) {
+		__ptrauth_key_install(APIA, keys->apia);
+		__ptrauth_key_install(APIB, keys->apib);
+		__ptrauth_key_install(APDA, keys->apda);
+		__ptrauth_key_install(APDB, keys->apdb);
+	}
+
+	if (system_supports_generic_auth())
+		__ptrauth_key_install(APGA, keys->apga);
+}
+
+#define ptrauth_thread_init_user(tsk)					\
+do {									\
+	struct task_struct *__ptiu_tsk = (tsk);				\
+	ptrauth_keys_init(&__ptiu_tsk->thread_info.keys_user);		\
+	ptrauth_keys_switch(&__ptiu_tsk->thread_info.keys_user);	\
+} while (0)
+
+#define ptrauth_thread_switch(tsk)	\
+	ptrauth_keys_switch(&(tsk)->thread_info.keys_user)
+
+#else /* CONFIG_ARM64_PTR_AUTH */
+#define ptrauth_thread_init_user(tsk)
+#define ptrauth_thread_switch(tsk)
+#endif /* CONFIG_ARM64_PTR_AUTH */
+
+#endif /* __ASM_POINTER_AUTH_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index bbca68b54732..f8f66ad9dd8f 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -28,6 +28,7 @@
 struct task_struct;
 
 #include <asm/memory.h>
+#include <asm/pointer_auth.h>
 #include <asm/stack_pointer.h>
 #include <asm/types.h>
 
@@ -54,6 +55,9 @@ struct thread_info {
 #endif
 		} preempt;
 	};
+#ifdef CONFIG_ARM64_PTR_AUTH
+	struct ptrauth_keys	keys_user;
+#endif
 };
 
 #define thread_saved_pc(tsk)	\
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 7784f7cba16c..5f0750c2199c 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -50,5 +50,7 @@
 #define HWCAP_FLAGM		(1 << 27)
 #define HWCAP_SSBS		(1 << 28)
 #define HWCAP_SB		(1 << 29)
+#define HWCAP_PACA		(1 << 30)
+#define HWCAP_PACG		(1UL << 31)
 
 #endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index c82ebd8f0087..f15000872e08 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1191,6 +1191,12 @@ static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused)
 #endif /* CONFIG_ARM64_RAS_EXTN */
 
 #ifdef CONFIG_ARM64_PTR_AUTH
+static void cpu_enable_address_auth(struct arm64_cpu_capabilities const *cap)
+{
+	sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ENIA | SCTLR_ELx_ENIB |
+				       SCTLR_ELx_ENDA | SCTLR_ELx_ENDB);
+}
+
 static bool has_address_auth(const struct arm64_cpu_capabilities *entry,
 			     int __unused)
 {
@@ -1478,6 +1484,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.capability = ARM64_HAS_ADDRESS_AUTH,
 		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
 		.matches = has_address_auth,
+		.cpu_enable = cpu_enable_address_auth,
 	},
 	{
 		.desc = "Generic authentication (architected algorithm)",
@@ -1552,6 +1559,12 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE),
 #endif
 	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS),
+#ifdef CONFIG_ARM64_PTR_AUTH
+	{ .desc = "HWCAP_PACA", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_address_auth,
+		.hwcap_type = CAP_HWCAP, .hwcap = HWCAP_PACA },
+	{ .desc = "HWCAP_PACG", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_generic_auth,
+		.hwcap_type = CAP_HWCAP, .hwcap = HWCAP_PACG },
+#endif
 	{},
 };
 
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 7cb0b08ab0a7..ca0685f33900 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -83,6 +83,8 @@ static const char *const hwcap_str[] = {
 	"flagm",
 	"ssbs",
 	"sb",
+	"paca",
+	"pacg",
 	NULL
 };
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 8a2d68f04e0d..e0a443730e04 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -57,6 +57,7 @@
 #include <asm/fpsimd.h>
 #include <asm/mmu_context.h>
 #include <asm/processor.h>
+#include <asm/pointer_auth.h>
 #include <asm/stacktrace.h>
 
 #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
@@ -429,6 +430,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	contextidr_thread_switch(next);
 	entry_task_switch(next);
 	uao_thread_switch(next);
+	ptrauth_thread_switch(next);
 
 	/*
 	 * Complete any pending TLB or cache maintenance on this CPU in case
@@ -496,4 +498,6 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 void arch_setup_new_exec(void)
 {
 	current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
+
+	ptrauth_thread_init_user(current);
 }
-- 
cgit 


From ec6e822d1a22d0eef1d1fa260dff751dba9a4258 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 7 Dec 2018 18:39:26 +0000
Subject: arm64: expose user PAC bit positions via ptrace

When pointer authentication is in use, data/instruction pointers have a
number of PAC bits inserted into them. The number and position of these
bits depends on the configured TCR_ELx.TxSZ and whether tagging is
enabled. ARMv8.3 allows tagging to differ for instruction and data
pointers.

For userspace debuggers to unwind the stack and/or to follow pointer
chains, they need to be able to remove the PAC bits before attempting to
use a pointer.

This patch adds a new structure with masks describing the location of
the PAC bits in userspace instruction and data pointers (i.e. those
addressable via TTBR0), which userspace can query via PTRACE_GETREGSET.
By clearing these bits from pointers (and replacing them with the value
of bit 55), userspace can acquire the PAC-less versions.

This new regset is exposed when the kernel is built with (user) pointer
authentication support, and the address authentication feature is
enabled. Otherwise, the regset is hidden.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
[will: Fix to use vabits_user instead of VA_BITS and rename macro]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/memory.h       |  3 +++
 arch/arm64/include/asm/pointer_auth.h |  8 ++++++++
 arch/arm64/include/asm/processor.h    |  2 --
 arch/arm64/include/uapi/asm/ptrace.h  |  7 +++++++
 arch/arm64/kernel/ptrace.c            | 38 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/elf.h              |  1 +
 6 files changed, 57 insertions(+), 2 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 6747a3eddeb1..bd749033bc88 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -207,6 +207,9 @@ static inline unsigned long kaslr_offset(void)
 	return kimage_vaddr - KIMAGE_VADDR;
 }
 
+/* the actual size of a user virtual address */
+extern u64			vabits_user;
+
 /*
  * Allow all memory at the discovery stage. We will clip it later.
  */
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index 91c4185dda5b..2a22c03c1540 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -2,9 +2,11 @@
 #ifndef __ASM_POINTER_AUTH_H
 #define __ASM_POINTER_AUTH_H
 
+#include <linux/bitops.h>
 #include <linux/random.h>
 
 #include <asm/cpufeature.h>
+#include <asm/memory.h>
 #include <asm/sysreg.h>
 
 #ifdef CONFIG_ARM64_PTR_AUTH
@@ -61,6 +63,12 @@ static inline void ptrauth_keys_switch(struct ptrauth_keys *keys)
 		__ptrauth_key_install(APGA, keys->apga);
 }
 
+/*
+ * The EL0 pointer bits used by a pointer authentication code.
+ * This is dependent on TBI0 being enabled, or bits 63:56 would also apply.
+ */
+#define ptrauth_user_pac_mask()	GENMASK(54, vabits_user)
+
 #define ptrauth_thread_init_user(tsk)					\
 do {									\
 	struct task_struct *__ptiu_tsk = (tsk);				\
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index bbecc6fe3e5b..f4b8e09aff56 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -53,8 +53,6 @@
  */
 
 #define DEFAULT_MAP_WINDOW_64	(UL(1) << VA_BITS)
-
-extern u64 vabits_user;
 #define TASK_SIZE_64		(UL(1) << vabits_user)
 
 #ifdef CONFIG_COMPAT
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index a36227fdb084..c2f249bcd829 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -229,6 +229,13 @@ struct user_sve_header {
 		  SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags)	\
 		: SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags))
 
+/* pointer authentication masks (NT_ARM_PAC_MASK) */
+
+struct user_pac_mask {
+	__u64		data_mask;
+	__u64		insn_mask;
+};
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _UAPI__ASM_PTRACE_H */
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 1710a2d01669..9dce33b0e260 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -46,6 +46,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/fpsimd.h>
 #include <asm/pgtable.h>
+#include <asm/pointer_auth.h>
 #include <asm/stacktrace.h>
 #include <asm/syscall.h>
 #include <asm/traps.h>
@@ -956,6 +957,30 @@ out:
 
 #endif /* CONFIG_ARM64_SVE */
 
+#ifdef CONFIG_ARM64_PTR_AUTH
+static int pac_mask_get(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf)
+{
+	/*
+	 * The PAC bits can differ across data and instruction pointers
+	 * depending on TCR_EL1.TBID*, which we may make use of in future, so
+	 * we expose separate masks.
+	 */
+	unsigned long mask = ptrauth_user_pac_mask();
+	struct user_pac_mask uregs = {
+		.data_mask = mask,
+		.insn_mask = mask,
+	};
+
+	if (!system_supports_address_auth())
+		return -EINVAL;
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &uregs, 0, -1);
+}
+#endif /* CONFIG_ARM64_PTR_AUTH */
+
 enum aarch64_regset {
 	REGSET_GPR,
 	REGSET_FPR,
@@ -968,6 +993,9 @@ enum aarch64_regset {
 #ifdef CONFIG_ARM64_SVE
 	REGSET_SVE,
 #endif
+#ifdef CONFIG_ARM64_PTR_AUTH
+	REGSET_PAC_MASK,
+#endif
 };
 
 static const struct user_regset aarch64_regsets[] = {
@@ -1037,6 +1065,16 @@ static const struct user_regset aarch64_regsets[] = {
 		.get_size = sve_get_size,
 	},
 #endif
+#ifdef CONFIG_ARM64_PTR_AUTH
+	[REGSET_PAC_MASK] = {
+		.core_note_type = NT_ARM_PAC_MASK,
+		.n = sizeof(struct user_pac_mask) / sizeof(u64),
+		.size = sizeof(u64),
+		.align = sizeof(u64),
+		.get = pac_mask_get,
+		/* this cannot be set dynamically */
+	},
+#endif
 };
 
 static const struct user_regset_view user_aarch64_view = {
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index c5358e0ae7c5..3f23273d690c 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -420,6 +420,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_HW_WATCH	0x403		/* ARM hardware watchpoint registers */
 #define NT_ARM_SYSTEM_CALL	0x404	/* ARM system call number */
 #define NT_ARM_SVE	0x405		/* ARM Scalable Vector Extension registers */
+#define NT_ARM_PAC_MASK		0x406	/* ARM pointer authentication code masks */
 #define NT_ARC_V2	0x600		/* ARCv2 accumulator/extra registers */
 #define NT_VMCOREDD	0x700		/* Vmcore Device Dump Note */
 #define NT_MIPS_DSP	0x800		/* MIPS DSP ASE registers */
-- 
cgit 


From ccc43810827f9feb5e588e4b7098dc55b1d972f8 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Fri, 7 Dec 2018 18:39:27 +0000
Subject: arm64: perf: strip PAC when unwinding userspace

When the kernel is unwinding userspace callchains, we can't expect that
the userspace consumer of these callchains has the data necessary to
strip the PAC from the stored LR.

This patch has the kernel strip the PAC from user stackframes when the
in-kernel unwinder is used. This only affects the LR value, and not the
FP.

This only affects the in-kernel unwinder. When userspace performs
unwinding, it is up to userspace to strip PACs as necessary (which can
be determined from DWARF information).

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/pointer_auth.h | 7 +++++++
 arch/arm64/kernel/perf_callchain.c    | 6 +++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index 2a22c03c1540..5ccf49b4dac3 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -69,6 +69,12 @@ static inline void ptrauth_keys_switch(struct ptrauth_keys *keys)
  */
 #define ptrauth_user_pac_mask()	GENMASK(54, vabits_user)
 
+/* Only valid for EL0 TTBR0 instruction pointers */
+static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
+{
+	return ptr & ~ptrauth_user_pac_mask();
+}
+
 #define ptrauth_thread_init_user(tsk)					\
 do {									\
 	struct task_struct *__ptiu_tsk = (tsk);				\
@@ -80,6 +86,7 @@ do {									\
 	ptrauth_keys_switch(&(tsk)->thread_info.keys_user)
 
 #else /* CONFIG_ARM64_PTR_AUTH */
+#define ptrauth_strip_insn_pac(lr)	(lr)
 #define ptrauth_thread_init_user(tsk)
 #define ptrauth_thread_switch(tsk)
 #endif /* CONFIG_ARM64_PTR_AUTH */
diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c
index bcafd7dcfe8b..94754f07f67a 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -18,6 +18,7 @@
 #include <linux/perf_event.h>
 #include <linux/uaccess.h>
 
+#include <asm/pointer_auth.h>
 #include <asm/stacktrace.h>
 
 struct frame_tail {
@@ -35,6 +36,7 @@ user_backtrace(struct frame_tail __user *tail,
 {
 	struct frame_tail buftail;
 	unsigned long err;
+	unsigned long lr;
 
 	/* Also check accessibility of one struct frame_tail beyond */
 	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
@@ -47,7 +49,9 @@ user_backtrace(struct frame_tail __user *tail,
 	if (err)
 		return NULL;
 
-	perf_callchain_store(entry, buftail.lr);
+	lr = ptrauth_strip_insn_pac(buftail.lr);
+
+	perf_callchain_store(entry, lr);
 
 	/*
 	 * Frame pointers should strictly progress back up the stack
-- 
cgit 


From ba830885656414101b2f8ca88786524d4bb5e8c1 Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Fri, 7 Dec 2018 18:39:28 +0000
Subject: arm64: add prctl control for resetting ptrauth keys

Add an arm64-specific prctl to allow a thread to reinitialize its
pointer authentication keys to random values. This can be useful when
exec() is not used for starting new processes, to ensure that different
processes still have different keys.

Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/pointer_auth.h |  3 +++
 arch/arm64/include/asm/processor.h    |  4 +++
 arch/arm64/kernel/Makefile            |  1 +
 arch/arm64/kernel/pointer_auth.c      | 47 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/prctl.h            |  8 ++++++
 kernel/sys.c                          |  8 ++++++
 6 files changed, 71 insertions(+)
 create mode 100644 arch/arm64/kernel/pointer_auth.c

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index 5ccf49b4dac3..80eb03afd677 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -63,6 +63,8 @@ static inline void ptrauth_keys_switch(struct ptrauth_keys *keys)
 		__ptrauth_key_install(APGA, keys->apga);
 }
 
+extern int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg);
+
 /*
  * The EL0 pointer bits used by a pointer authentication code.
  * This is dependent on TBI0 being enabled, or bits 63:56 would also apply.
@@ -86,6 +88,7 @@ do {									\
 	ptrauth_keys_switch(&(tsk)->thread_info.keys_user)
 
 #else /* CONFIG_ARM64_PTR_AUTH */
+#define ptrauth_prctl_reset_keys(tsk, arg)	(-EINVAL)
 #define ptrauth_strip_insn_pac(lr)	(lr)
 #define ptrauth_thread_init_user(tsk)
 #define ptrauth_thread_switch(tsk)
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index f4b8e09aff56..142c708cb429 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -44,6 +44,7 @@
 #include <asm/hw_breakpoint.h>
 #include <asm/lse.h>
 #include <asm/pgtable-hwdef.h>
+#include <asm/pointer_auth.h>
 #include <asm/ptrace.h>
 #include <asm/types.h>
 
@@ -289,6 +290,9 @@ extern void __init minsigstksz_setup(void);
 #define SVE_SET_VL(arg)	sve_set_current_vl(arg)
 #define SVE_GET_VL()	sve_get_current_vl()
 
+/* PR_PAC_RESET_KEYS prctl */
+#define PAC_RESET_KEYS(tsk, arg)	ptrauth_prctl_reset_keys(tsk, arg)
+
 /*
  * For CONFIG_GCC_PLUGIN_STACKLEAK
  *
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 583334ce1c2c..df08d735b21d 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -58,6 +58,7 @@ arm64-obj-$(CONFIG_CRASH_DUMP)		+= crash_dump.o
 arm64-obj-$(CONFIG_CRASH_CORE)		+= crash_core.o
 arm64-obj-$(CONFIG_ARM_SDE_INTERFACE)	+= sdei.o
 arm64-obj-$(CONFIG_ARM64_SSBD)		+= ssbd.o
+arm64-obj-$(CONFIG_ARM64_PTR_AUTH)	+= pointer_auth.o
 
 obj-y					+= $(arm64-obj-y) vdso/ probes/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
new file mode 100644
index 000000000000..b9f6f5f3409a
--- /dev/null
+++ b/arch/arm64/kernel/pointer_auth.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/errno.h>
+#include <linux/prctl.h>
+#include <linux/random.h>
+#include <linux/sched.h>
+#include <asm/cpufeature.h>
+#include <asm/pointer_auth.h>
+
+int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
+{
+	struct ptrauth_keys *keys = &tsk->thread_info.keys_user;
+	unsigned long addr_key_mask = PR_PAC_APIAKEY | PR_PAC_APIBKEY |
+				      PR_PAC_APDAKEY | PR_PAC_APDBKEY;
+	unsigned long key_mask = addr_key_mask | PR_PAC_APGAKEY;
+
+	if (!system_supports_address_auth() && !system_supports_generic_auth())
+		return -EINVAL;
+
+	if (!arg) {
+		ptrauth_keys_init(keys);
+		ptrauth_keys_switch(keys);
+		return 0;
+	}
+
+	if (arg & ~key_mask)
+		return -EINVAL;
+
+	if (((arg & addr_key_mask) && !system_supports_address_auth()) ||
+	    ((arg & PR_PAC_APGAKEY) && !system_supports_generic_auth()))
+		return -EINVAL;
+
+	if (arg & PR_PAC_APIAKEY)
+		get_random_bytes(&keys->apia, sizeof(keys->apia));
+	if (arg & PR_PAC_APIBKEY)
+		get_random_bytes(&keys->apib, sizeof(keys->apib));
+	if (arg & PR_PAC_APDAKEY)
+		get_random_bytes(&keys->apda, sizeof(keys->apda));
+	if (arg & PR_PAC_APDBKEY)
+		get_random_bytes(&keys->apdb, sizeof(keys->apdb));
+	if (arg & PR_PAC_APGAKEY)
+		get_random_bytes(&keys->apga, sizeof(keys->apga));
+
+	ptrauth_keys_switch(keys);
+
+	return 0;
+}
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index c0d7ea0bf5b6..0f535a501391 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -219,4 +219,12 @@ struct prctl_mm_map {
 # define PR_SPEC_DISABLE		(1UL << 2)
 # define PR_SPEC_FORCE_DISABLE		(1UL << 3)
 
+/* Reset arm64 pointer authentication keys */
+#define PR_PAC_RESET_KEYS		54
+# define PR_PAC_APIAKEY			(1UL << 0)
+# define PR_PAC_APIBKEY			(1UL << 1)
+# define PR_PAC_APDAKEY			(1UL << 2)
+# define PR_PAC_APDBKEY			(1UL << 3)
+# define PR_PAC_APGAKEY			(1UL << 4)
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 123bd73046ec..64b5a230f38d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -121,6 +121,9 @@
 #ifndef SVE_GET_VL
 # define SVE_GET_VL()		(-EINVAL)
 #endif
+#ifndef PAC_RESET_KEYS
+# define PAC_RESET_KEYS(a, b)	(-EINVAL)
+#endif
 
 /*
  * this is where the system-wide overflow UID and GID are defined, for
@@ -2476,6 +2479,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			return -EINVAL;
 		error = arch_prctl_spec_ctrl_set(me, arg2, arg3);
 		break;
+	case PR_PAC_RESET_KEYS:
+		if (arg3 || arg4 || arg5)
+			return -EINVAL;
+		error = PAC_RESET_KEYS(me, arg2);
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
cgit 


From 84931327a807a4dd65d0d6b53a8ae47845c91f79 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 13 Dec 2018 13:14:06 +0000
Subject: arm64: ptr auth: Move per-thread keys from thread_info to
 thread_struct

We don't need to get at the per-thread keys from assembly at all, so
they can live alongside the rest of the per-thread register state in
thread_struct instead of thread_info.

This will also allow straighforward whitelisting of the keys for
hardened usercopy should we expose them via a ptrace request later on.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/pointer_auth.h | 6 +++---
 arch/arm64/include/asm/processor.h    | 3 +++
 arch/arm64/include/asm/thread_info.h  | 4 ----
 arch/arm64/kernel/pointer_auth.c      | 2 +-
 4 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index 80eb03afd677..15d49515efdd 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -80,12 +80,12 @@ static inline unsigned long ptrauth_strip_insn_pac(unsigned long ptr)
 #define ptrauth_thread_init_user(tsk)					\
 do {									\
 	struct task_struct *__ptiu_tsk = (tsk);				\
-	ptrauth_keys_init(&__ptiu_tsk->thread_info.keys_user);		\
-	ptrauth_keys_switch(&__ptiu_tsk->thread_info.keys_user);	\
+	ptrauth_keys_init(&__ptiu_tsk->thread.keys_user);		\
+	ptrauth_keys_switch(&__ptiu_tsk->thread.keys_user);		\
 } while (0)
 
 #define ptrauth_thread_switch(tsk)	\
-	ptrauth_keys_switch(&(tsk)->thread_info.keys_user)
+	ptrauth_keys_switch(&(tsk)->thread.keys_user)
 
 #else /* CONFIG_ARM64_PTR_AUTH */
 #define ptrauth_prctl_reset_keys(tsk, arg)	(-EINVAL)
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 142c708cb429..f1a7ab18faf3 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -147,6 +147,9 @@ struct thread_struct {
 	unsigned long		fault_address;	/* fault info */
 	unsigned long		fault_code;	/* ESR_EL1 value */
 	struct debug_info	debug;		/* debugging */
+#ifdef CONFIG_ARM64_PTR_AUTH
+	struct ptrauth_keys	keys_user;
+#endif
 };
 
 static inline void arch_thread_struct_whitelist(unsigned long *offset,
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f8f66ad9dd8f..bbca68b54732 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -28,7 +28,6 @@
 struct task_struct;
 
 #include <asm/memory.h>
-#include <asm/pointer_auth.h>
 #include <asm/stack_pointer.h>
 #include <asm/types.h>
 
@@ -55,9 +54,6 @@ struct thread_info {
 #endif
 		} preempt;
 	};
-#ifdef CONFIG_ARM64_PTR_AUTH
-	struct ptrauth_keys	keys_user;
-#endif
 };
 
 #define thread_saved_pc(tsk)	\
diff --git a/arch/arm64/kernel/pointer_auth.c b/arch/arm64/kernel/pointer_auth.c
index b9f6f5f3409a..c507b584259d 100644
--- a/arch/arm64/kernel/pointer_auth.c
+++ b/arch/arm64/kernel/pointer_auth.c
@@ -9,7 +9,7 @@
 
 int ptrauth_prctl_reset_keys(struct task_struct *tsk, unsigned long arg)
 {
-	struct ptrauth_keys *keys = &tsk->thread_info.keys_user;
+	struct ptrauth_keys *keys = &tsk->thread.keys_user;
 	unsigned long addr_key_mask = PR_PAC_APIAKEY | PR_PAC_APIBKEY |
 				      PR_PAC_APDAKEY | PR_PAC_APDBKEY;
 	unsigned long key_mask = addr_key_mask | PR_PAC_APGAKEY;
-- 
cgit 


From a56005d3210500f8a166fcb83cbb5ac5d0f909e4 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 12 Dec 2018 15:52:02 +0000
Subject: arm64: cpufeature: Reduce number of pointer auth CPU caps from 6 to 4

We can easily avoid defining the two meta-capabilities for the address
and generic keys, so remove them and instead just check both of the
architected and impdef capabilities when determining the level of system
support.

Reviewed-by: Suzuki Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpucaps.h    |  8 +++-----
 arch/arm64/include/asm/cpufeature.h |  6 ++++--
 arch/arm64/kernel/cpufeature.c      | 11 +----------
 3 files changed, 8 insertions(+), 17 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 803f388e81d4..82e9099834ae 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -58,11 +58,9 @@
 #define ARM64_WORKAROUND_1165522		37
 #define ARM64_HAS_ADDRESS_AUTH_ARCH		38
 #define ARM64_HAS_ADDRESS_AUTH_IMP_DEF		39
-#define ARM64_HAS_ADDRESS_AUTH			40
-#define ARM64_HAS_GENERIC_AUTH_ARCH		41
-#define ARM64_HAS_GENERIC_AUTH_IMP_DEF		42
-#define ARM64_HAS_GENERIC_AUTH			43
+#define ARM64_HAS_GENERIC_AUTH_ARCH		40
+#define ARM64_HAS_GENERIC_AUTH_IMP_DEF		41
 
-#define ARM64_NCAPS				44
+#define ARM64_NCAPS				42
 
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index daec4b50d7fb..1e7fcd12b1c1 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -568,13 +568,15 @@ static inline bool system_supports_cnp(void)
 static inline bool system_supports_address_auth(void)
 {
 	return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) &&
-		cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH);
+		(cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH_ARCH) ||
+		 cpus_have_const_cap(ARM64_HAS_ADDRESS_AUTH_IMP_DEF));
 }
 
 static inline bool system_supports_generic_auth(void)
 {
 	return IS_ENABLED(CONFIG_ARM64_PTR_AUTH) &&
-		cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH);
+		(cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH_ARCH) ||
+		 cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF));
 }
 
 #define ARM64_SSBD_UNKNOWN		-1
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index f15000872e08..e0c76622f630 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1469,6 +1469,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.field_pos = ID_AA64ISAR1_APA_SHIFT,
 		.min_field_value = ID_AA64ISAR1_APA_ARCHITECTED,
 		.matches = has_cpuid_feature,
+		.cpu_enable = cpu_enable_address_auth,
 	},
 	{
 		.desc = "Address authentication (IMP DEF algorithm)",
@@ -1479,11 +1480,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.field_pos = ID_AA64ISAR1_API_SHIFT,
 		.min_field_value = ID_AA64ISAR1_API_IMP_DEF,
 		.matches = has_cpuid_feature,
-	},
-	{
-		.capability = ARM64_HAS_ADDRESS_AUTH,
-		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
-		.matches = has_address_auth,
 		.cpu_enable = cpu_enable_address_auth,
 	},
 	{
@@ -1506,11 +1502,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = ID_AA64ISAR1_GPI_IMP_DEF,
 		.matches = has_cpuid_feature,
 	},
-	{
-		.capability = ARM64_HAS_GENERIC_AUTH,
-		.type = ARM64_CPUCAP_SYSTEM_FEATURE,
-		.matches = has_generic_auth,
-	},
 #endif /* CONFIG_ARM64_PTR_AUTH */
 	{},
 };
-- 
cgit 


From 1e013d06120cbf67e771848fc5d98174c33b078a Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 12 Dec 2018 15:53:54 +0000
Subject: arm64: cpufeature: Rework ptr auth hwcaps using
 multi_entry_cap_matches

Open-coding the pointer-auth HWCAPs is a mess and can be avoided by
reusing the multi-cap logic from the CPU errata framework.

Move the multi_entry_cap_matches code to cpufeature.h and reuse it for
the pointer auth HWCAPs.

Reviewed-by: Suzuki Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/cpufeature.h | 59 ++++++++++++++++++-----
 arch/arm64/kernel/cpu_errata.c      | 34 +------------
 arch/arm64/kernel/cpufeature.c      | 95 ++++++++++++++++++++-----------------
 3 files changed, 99 insertions(+), 89 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 1e7fcd12b1c1..dfcfba725d72 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -321,19 +321,20 @@ struct arm64_cpu_capabilities {
 			bool sign;
 			unsigned long hwcap;
 		};
-		/*
-		 * A list of "matches/cpu_enable" pair for the same
-		 * "capability" of the same "type" as described by the parent.
-		 * Only matches(), cpu_enable() and fields relevant to these
-		 * methods are significant in the list. The cpu_enable is
-		 * invoked only if the corresponding entry "matches()".
-		 * However, if a cpu_enable() method is associated
-		 * with multiple matches(), care should be taken that either
-		 * the match criteria are mutually exclusive, or that the
-		 * method is robust against being called multiple times.
-		 */
-		const struct arm64_cpu_capabilities *match_list;
 	};
+
+	/*
+	 * An optional list of "matches/cpu_enable" pair for the same
+	 * "capability" of the same "type" as described by the parent.
+	 * Only matches(), cpu_enable() and fields relevant to these
+	 * methods are significant in the list. The cpu_enable is
+	 * invoked only if the corresponding entry "matches()".
+	 * However, if a cpu_enable() method is associated
+	 * with multiple matches(), care should be taken that either
+	 * the match criteria are mutually exclusive, or that the
+	 * method is robust against being called multiple times.
+	 */
+	const struct arm64_cpu_capabilities *match_list;
 };
 
 static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap)
@@ -353,6 +354,39 @@ cpucap_late_cpu_permitted(const struct arm64_cpu_capabilities *cap)
 	return !!(cap->type & ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU);
 }
 
+/*
+ * Generic helper for handling capabilties with multiple (match,enable) pairs
+ * of call backs, sharing the same capability bit.
+ * Iterate over each entry to see if at least one matches.
+ */
+static inline bool
+cpucap_multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry,
+			       int scope)
+{
+	const struct arm64_cpu_capabilities *caps;
+
+	for (caps = entry->match_list; caps->matches; caps++)
+		if (caps->matches(caps, scope))
+			return true;
+
+	return false;
+}
+
+/*
+ * Take appropriate action for all matching entries in the shared capability
+ * entry.
+ */
+static inline void
+cpucap_multi_entry_cap_cpu_enable(const struct arm64_cpu_capabilities *entry)
+{
+	const struct arm64_cpu_capabilities *caps;
+
+	for (caps = entry->match_list; caps->matches; caps++)
+		if (caps->matches(caps, SCOPE_LOCAL_CPU) &&
+		    caps->cpu_enable)
+			caps->cpu_enable(caps);
+}
+
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
 extern struct static_key_false arm64_const_caps_ready;
@@ -476,7 +510,6 @@ static inline bool id_aa64pfr0_sve(u64 pfr0)
 void __init setup_cpu_features(void);
 void check_local_cpu_capabilities(void);
 
-
 u64 read_sanitised_ftr_reg(u32 id);
 
 static inline bool cpu_supports_mixed_endian_el0(void)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index e3123055e552..ff2fda3a98e1 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -507,38 +507,6 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
 	.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,			\
 	CAP_MIDR_RANGE_LIST(midr_list)
 
-/*
- * Generic helper for handling capabilties with multiple (match,enable) pairs
- * of call backs, sharing the same capability bit.
- * Iterate over each entry to see if at least one matches.
- */
-static bool __maybe_unused
-multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry, int scope)
-{
-	const struct arm64_cpu_capabilities *caps;
-
-	for (caps = entry->match_list; caps->matches; caps++)
-		if (caps->matches(caps, scope))
-			return true;
-
-	return false;
-}
-
-/*
- * Take appropriate action for all matching entries in the shared capability
- * entry.
- */
-static void __maybe_unused
-multi_entry_cap_cpu_enable(const struct arm64_cpu_capabilities *entry)
-{
-	const struct arm64_cpu_capabilities *caps;
-
-	for (caps = entry->match_list; caps->matches; caps++)
-		if (caps->matches(caps, SCOPE_LOCAL_CPU) &&
-		    caps->cpu_enable)
-			caps->cpu_enable(caps);
-}
-
 #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 
 /*
@@ -700,7 +668,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 	{
 		.desc = "Qualcomm Technologies Falkor/Kryo erratum 1003",
 		.capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003,
-		.matches = multi_entry_cap_matches,
+		.matches = cpucap_multi_entry_cap_matches,
 		.match_list = qcom_erratum_1003_list,
 	},
 #endif
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index e0c76622f630..7ea0b2f20262 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -1196,34 +1196,6 @@ static void cpu_enable_address_auth(struct arm64_cpu_capabilities const *cap)
 	sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ENIA | SCTLR_ELx_ENIB |
 				       SCTLR_ELx_ENDA | SCTLR_ELx_ENDB);
 }
-
-static bool has_address_auth(const struct arm64_cpu_capabilities *entry,
-			     int __unused)
-{
-	u64 isar1 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
-	bool api, apa;
-
-	apa = cpuid_feature_extract_unsigned_field(isar1,
-					ID_AA64ISAR1_APA_SHIFT) > 0;
-	api = cpuid_feature_extract_unsigned_field(isar1,
-					ID_AA64ISAR1_API_SHIFT) > 0;
-
-	return apa || api;
-}
-
-static bool has_generic_auth(const struct arm64_cpu_capabilities *entry,
-			     int __unused)
-{
-	u64 isar1 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
-	bool gpi, gpa;
-
-	gpa = cpuid_feature_extract_unsigned_field(isar1,
-					ID_AA64ISAR1_GPA_SHIFT) > 0;
-	gpi = cpuid_feature_extract_unsigned_field(isar1,
-					ID_AA64ISAR1_GPI_SHIFT) > 0;
-
-	return gpa || gpi;
-}
 #endif /* CONFIG_ARM64_PTR_AUTH */
 
 static const struct arm64_cpu_capabilities arm64_features[] = {
@@ -1506,19 +1478,58 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 	{},
 };
 
-#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap)	\
-	{							\
-		.desc = #cap,					\
-		.type = ARM64_CPUCAP_SYSTEM_FEATURE,		\
-		.matches = has_cpuid_feature,			\
-		.sys_reg = reg,					\
-		.field_pos = field,				\
-		.sign = s,					\
-		.min_field_value = min_value,			\
-		.hwcap_type = cap_type,				\
-		.hwcap = cap,					\
+#define HWCAP_CPUID_MATCH(reg, field, s, min_value)				\
+		.matches = has_cpuid_feature,					\
+		.sys_reg = reg,							\
+		.field_pos = field,						\
+		.sign = s,							\
+		.min_field_value = min_value,
+
+#define __HWCAP_CAP(name, cap_type, cap)					\
+		.desc = name,							\
+		.type = ARM64_CPUCAP_SYSTEM_FEATURE,				\
+		.hwcap_type = cap_type,						\
+		.hwcap = cap,							\
+
+#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap)			\
+	{									\
+		__HWCAP_CAP(#cap, cap_type, cap)				\
+		HWCAP_CPUID_MATCH(reg, field, s, min_value)			\
 	}
 
+#define HWCAP_MULTI_CAP(list, cap_type, cap)					\
+	{									\
+		__HWCAP_CAP(#cap, cap_type, cap)				\
+		.matches = cpucap_multi_entry_cap_matches,			\
+		.match_list = list,						\
+	}
+
+#ifdef CONFIG_ARM64_PTR_AUTH
+static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = {
+	{
+		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT,
+				  FTR_UNSIGNED, ID_AA64ISAR1_APA_ARCHITECTED)
+	},
+	{
+		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT,
+				  FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF)
+	},
+	{},
+};
+
+static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
+	{
+		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT,
+				  FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED)
+	},
+	{
+		HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT,
+				  FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF)
+	},
+	{},
+};
+#endif
+
 static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL),
 	HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
@@ -1551,10 +1562,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 #endif
 	HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS),
 #ifdef CONFIG_ARM64_PTR_AUTH
-	{ .desc = "HWCAP_PACA", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_address_auth,
-		.hwcap_type = CAP_HWCAP, .hwcap = HWCAP_PACA },
-	{ .desc = "HWCAP_PACG", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_generic_auth,
-		.hwcap_type = CAP_HWCAP, .hwcap = HWCAP_PACG },
+	HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, HWCAP_PACA),
+	HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, HWCAP_PACG),
 #endif
 	{},
 };
-- 
cgit 


From 97bebc5facf7d495e72df0b7c50be3699c7bdd73 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 11 Dec 2018 16:42:31 +0000
Subject: arm64: sysreg: Use _BITUL() when defining register bits

Using shifts directly is error-prone and can cause inadvertent sign
extensions or build problems with older versions of binutils.

Consistent use of the _BITUL() macro makes these problems disappear.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/include/asm/sysreg.h | 81 +++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 40 deletions(-)

(limited to 'arch/arm64/include/asm')

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index bb15d09daf56..0e8104f6c660 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -20,6 +20,7 @@
 #ifndef __ASM_SYSREG_H
 #define __ASM_SYSREG_H
 
+#include <linux/const.h>
 #include <linux/stringify.h>
 
 /*
@@ -449,31 +450,31 @@
 #define SYS_ICH_LR15_EL2		__SYS__LR8_EL2(7)
 
 /* Common SCTLR_ELx flags. */
-#define SCTLR_ELx_DSSBS	(1UL << 44)
-#define SCTLR_ELx_ENIA	(1U << 31)
-#define SCTLR_ELx_ENIB	(1 << 30)
-#define SCTLR_ELx_ENDA	(1 << 27)
-#define SCTLR_ELx_EE    (1 << 25)
-#define SCTLR_ELx_IESB	(1 << 21)
-#define SCTLR_ELx_WXN	(1 << 19)
-#define SCTLR_ELx_ENDB	(1 << 13)
-#define SCTLR_ELx_I	(1 << 12)
-#define SCTLR_ELx_SA	(1 << 3)
-#define SCTLR_ELx_C	(1 << 2)
-#define SCTLR_ELx_A	(1 << 1)
-#define SCTLR_ELx_M	1
+#define SCTLR_ELx_DSSBS	(_BITUL(44))
+#define SCTLR_ELx_ENIA	(_BITUL(31))
+#define SCTLR_ELx_ENIB	(_BITUL(30))
+#define SCTLR_ELx_ENDA	(_BITUL(27))
+#define SCTLR_ELx_EE    (_BITUL(25))
+#define SCTLR_ELx_IESB	(_BITUL(21))
+#define SCTLR_ELx_WXN	(_BITUL(19))
+#define SCTLR_ELx_ENDB	(_BITUL(13))
+#define SCTLR_ELx_I	(_BITUL(12))
+#define SCTLR_ELx_SA	(_BITUL(3))
+#define SCTLR_ELx_C	(_BITUL(2))
+#define SCTLR_ELx_A	(_BITUL(1))
+#define SCTLR_ELx_M	(_BITUL(0))
 
 #define SCTLR_ELx_FLAGS	(SCTLR_ELx_M  | SCTLR_ELx_A | SCTLR_ELx_C | \
 			 SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB)
 
 /* SCTLR_EL2 specific flags. */
-#define SCTLR_EL2_RES1	((1 << 4)  | (1 << 5)  | (1 << 11) | (1 << 16) | \
-			 (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \
-			 (1 << 29))
-#define SCTLR_EL2_RES0	((1 << 6)  | (1 << 7)  | (1 << 8)  | (1 << 9)  | \
-			 (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \
-			 (1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \
-			 (1 << 27) | (1 << 30) | (1 << 31) | \
+#define SCTLR_EL2_RES1	((_BITUL(4))  | (_BITUL(5))  | (_BITUL(11)) | (_BITUL(16)) | \
+			 (_BITUL(18)) | (_BITUL(22)) | (_BITUL(23)) | (_BITUL(28)) | \
+			 (_BITUL(29)))
+#define SCTLR_EL2_RES0	((_BITUL(6))  | (_BITUL(7))  | (_BITUL(8))  | (_BITUL(9))  | \
+			 (_BITUL(10)) | (_BITUL(13)) | (_BITUL(14)) | (_BITUL(15)) | \
+			 (_BITUL(17)) | (_BITUL(20)) | (_BITUL(24)) | (_BITUL(26)) | \
+			 (_BITUL(27)) | (_BITUL(30)) | (_BITUL(31)) | \
 			 (0xffffefffUL << 32))
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
@@ -495,23 +496,23 @@
 #endif
 
 /* SCTLR_EL1 specific flags. */
-#define SCTLR_EL1_UCI		(1 << 26)
-#define SCTLR_EL1_E0E		(1 << 24)
-#define SCTLR_EL1_SPAN		(1 << 23)
-#define SCTLR_EL1_NTWE		(1 << 18)
-#define SCTLR_EL1_NTWI		(1 << 16)
-#define SCTLR_EL1_UCT		(1 << 15)
-#define SCTLR_EL1_DZE		(1 << 14)
-#define SCTLR_EL1_UMA		(1 << 9)
-#define SCTLR_EL1_SED		(1 << 8)
-#define SCTLR_EL1_ITD		(1 << 7)
-#define SCTLR_EL1_CP15BEN	(1 << 5)
-#define SCTLR_EL1_SA0		(1 << 4)
-
-#define SCTLR_EL1_RES1	((1 << 11) | (1 << 20) | (1 << 22) | (1 << 28) | \
-			 (1 << 29))
-#define SCTLR_EL1_RES0  ((1 << 6)  | (1 << 10) | (1 << 13) | (1 << 17) | \
-			 (1 << 27) | (1 << 30) | (1 << 31) | \
+#define SCTLR_EL1_UCI		(_BITUL(26))
+#define SCTLR_EL1_E0E		(_BITUL(24))
+#define SCTLR_EL1_SPAN		(_BITUL(23))
+#define SCTLR_EL1_NTWE		(_BITUL(18))
+#define SCTLR_EL1_NTWI		(_BITUL(16))
+#define SCTLR_EL1_UCT		(_BITUL(15))
+#define SCTLR_EL1_DZE		(_BITUL(14))
+#define SCTLR_EL1_UMA		(_BITUL(9))
+#define SCTLR_EL1_SED		(_BITUL(8))
+#define SCTLR_EL1_ITD		(_BITUL(7))
+#define SCTLR_EL1_CP15BEN	(_BITUL(5))
+#define SCTLR_EL1_SA0		(_BITUL(4))
+
+#define SCTLR_EL1_RES1	((_BITUL(11)) | (_BITUL(20)) | (_BITUL(22)) | (_BITUL(28)) | \
+			 (_BITUL(29)))
+#define SCTLR_EL1_RES0  ((_BITUL(6))  | (_BITUL(10)) | (_BITUL(13)) | (_BITUL(17)) | \
+			 (_BITUL(27)) | (_BITUL(30)) | (_BITUL(31)) | \
 			 (0xffffefffUL << 32))
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
@@ -712,13 +713,13 @@
 #define ZCR_ELx_LEN_SIZE	9
 #define ZCR_ELx_LEN_MASK	0x1ff
 
-#define CPACR_EL1_ZEN_EL1EN	(1 << 16) /* enable EL1 access */
-#define CPACR_EL1_ZEN_EL0EN	(1 << 17) /* enable EL0 access, if EL1EN set */
+#define CPACR_EL1_ZEN_EL1EN	(_BITUL(16)) /* enable EL1 access */
+#define CPACR_EL1_ZEN_EL0EN	(_BITUL(17)) /* enable EL0 access, if EL1EN set */
 #define CPACR_EL1_ZEN		(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
 
 
 /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
-#define SYS_MPIDR_SAFE_VAL		(1UL << 31)
+#define SYS_MPIDR_SAFE_VAL	(_BITUL(31))
 
 #ifdef __ASSEMBLY__
 
-- 
cgit