aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/include/asm/text-patching.h
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2024-11-23 09:58:07 -0800
committerLinus Torvalds <[email protected]>2024-11-23 09:58:07 -0800
commit5c00ff742bf5caf85f60e1c73999f99376fb865d (patch)
treefa484e83c27af79f1c0511e7e0673507461c9379 /arch/powerpc/include/asm/text-patching.h
parent228a1157fb9fec47eb135b51c0202b574e079ebf (diff)
parent2532e6c74a67e65b95f310946e0c0e0a41b3a34b (diff)
Merge tag 'mm-stable-2024-11-18-19-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - The series "zram: optimal post-processing target selection" from Sergey Senozhatsky improves zram's post-processing selection algorithm. This leads to improved memory savings. - Wei Yang has gone to town on the mapletree code, contributing several series which clean up the implementation: - "refine mas_mab_cp()" - "Reduce the space to be cleared for maple_big_node" - "maple_tree: simplify mas_push_node()" - "Following cleanup after introduce mas_wr_store_type()" - "refine storing null" - The series "selftests/mm: hugetlb_fault_after_madv improvements" from David Hildenbrand fixes this selftest for s390. - The series "introduce pte_offset_map_{ro|rw}_nolock()" from Qi Zheng implements some rationaizations and cleanups in the page mapping code. - The series "mm: optimize shadow entries removal" from Shakeel Butt optimizes the file truncation code by speeding up the handling of shadow entries. - The series "Remove PageKsm()" from Matthew Wilcox completes the migration of this flag over to being a folio-based flag. - The series "Unify hugetlb into arch_get_unmapped_area functions" from Oscar Salvador implements a bunch of consolidations and cleanups in the hugetlb code. - The series "Do not shatter hugezeropage on wp-fault" from Dev Jain takes away the wp-fault time practice of turning a huge zero page into small pages. Instead we replace the whole thing with a THP. More consistent cleaner and potentiall saves a large number of pagefaults. - The series "percpu: Add a test case and fix for clang" from Andy Shevchenko enhances and fixes the kernel's built in percpu test code. - The series "mm/mremap: Remove extra vma tree walk" from Liam Howlett optimizes mremap() by avoiding doing things which we didn't need to do. - The series "Improve the tmpfs large folio read performance" from Baolin Wang teaches tmpfs to copy data into userspace at the folio size rather than as individual pages. A 20% speedup was observed. - The series "mm/damon/vaddr: Fix issue in damon_va_evenly_split_region()" fro Zheng Yejian fixes DAMON splitting. - The series "memcg-v1: fully deprecate charge moving" from Shakeel Butt removes the long-deprecated memcgv2 charge moving feature. - The series "fix error handling in mmap_region() and refactor" from Lorenzo Stoakes cleanup up some of the mmap() error handling and addresses some potential performance issues. - The series "x86/module: use large ROX pages for text allocations" from Mike Rapoport teaches x86 to use large pages for read-only-execute module text. - The series "page allocation tag compression" from Suren Baghdasaryan is followon maintenance work for the new page allocation profiling feature. - The series "page->index removals in mm" from Matthew Wilcox remove most references to page->index in mm/. A slow march towards shrinking struct page. - The series "damon/{self,kunit}tests: minor fixups for DAMON debugfs interface tests" from Andrew Paniakin performs maintenance work for DAMON's self testing code. - The series "mm: zswap swap-out of large folios" from Kanchana Sridhar improves zswap's batching of compression and decompression. It is a step along the way towards using Intel IAA hardware acceleration for this zswap operation. - The series "kasan: migrate the last module test to kunit" from Sabyrzhan Tasbolatov completes the migration of the KASAN built-in tests over to the KUnit framework. - The series "implement lightweight guard pages" from Lorenzo Stoakes permits userapace to place fault-generating guard pages within a single VMA, rather than requiring that multiple VMAs be created for this. Improved efficiencies for userspace memory allocators are expected. - The series "memcg: tracepoint for flushing stats" from JP Kobryn uses tracepoints to provide increased visibility into memcg stats flushing activity. - The series "zram: IDLE flag handling fixes" from Sergey Senozhatsky fixes a zram buglet which potentially affected performance. - The series "mm: add more kernel parameters to control mTHP" from MaĆ­ra Canal enhances our ability to control/configuremultisize THP from the kernel boot command line. - The series "kasan: few improvements on kunit tests" from Sabyrzhan Tasbolatov has a couple of fixups for the KASAN KUnit tests. - The series "mm/list_lru: Split list_lru lock into per-cgroup scope" from Kairui Song optimizes list_lru memory utilization when lockdep is enabled. * tag 'mm-stable-2024-11-18-19-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (215 commits) cma: enforce non-zero pageblock_order during cma_init_reserved_mem() mm/kfence: add a new kunit test test_use_after_free_read_nofault() zram: fix NULL pointer in comp_algorithm_show() memcg/hugetlb: add hugeTLB counters to memcg vmstat: call fold_vm_zone_numa_events() before show per zone NUMA event mm: mmap_lock: check trace_mmap_lock_$type_enabled() instead of regcount zram: ZRAM_DEF_COMP should depend on ZRAM MAINTAINERS/MEMORY MANAGEMENT: add document files for mm Docs/mm/damon: recommend academic papers to read and/or cite mm: define general function pXd_init() kmemleak: iommu/iova: fix transient kmemleak false positive mm/list_lru: simplify the list_lru walk callback function mm/list_lru: split the lock to per-cgroup scope mm/list_lru: simplify reparenting and initial allocation mm/list_lru: code clean up for reparenting mm/list_lru: don't export list_lru_add mm/list_lru: don't pass unnecessary key parameters kasan: add kunit tests for kmalloc_track_caller, kmalloc_node_track_caller kasan: change kasan_atomics kunit test as KUNIT_CASE_SLOW kasan: use EXPORT_SYMBOL_IF_KUNIT to export symbols ...
Diffstat (limited to 'arch/powerpc/include/asm/text-patching.h')
-rw-r--r--arch/powerpc/include/asm/text-patching.h275
1 files changed, 275 insertions, 0 deletions
diff --git a/arch/powerpc/include/asm/text-patching.h b/arch/powerpc/include/asm/text-patching.h
new file mode 100644
index 000000000000..e7f14720f630
--- /dev/null
+++ b/arch/powerpc/include/asm/text-patching.h
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_CODE_PATCHING_H
+#define _ASM_POWERPC_CODE_PATCHING_H
+
+/*
+ * Copyright 2008, Michael Ellerman, IBM Corporation.
+ */
+
+#include <asm/types.h>
+#include <asm/ppc-opcode.h>
+#include <linux/string.h>
+#include <linux/kallsyms.h>
+#include <asm/asm-compat.h>
+#include <asm/inst.h>
+
+/* Flags for create_branch:
+ * "b" == create_branch(addr, target, 0);
+ * "ba" == create_branch(addr, target, BRANCH_ABSOLUTE);
+ * "bl" == create_branch(addr, target, BRANCH_SET_LINK);
+ * "bla" == create_branch(addr, target, BRANCH_ABSOLUTE | BRANCH_SET_LINK);
+ */
+#define BRANCH_SET_LINK 0x1
+#define BRANCH_ABSOLUTE 0x2
+
+/*
+ * Powerpc branch instruction is :
+ *
+ * 0 6 30 31
+ * +---------+----------------+---+---+
+ * | opcode | LI |AA |LK |
+ * +---------+----------------+---+---+
+ * Where AA = 0 and LK = 0
+ *
+ * LI is a signed 24 bits integer. The real branch offset is computed
+ * by: imm32 = SignExtend(LI:'0b00', 32);
+ *
+ * So the maximum forward branch should be:
+ * (0x007fffff << 2) = 0x01fffffc = 0x1fffffc
+ * The maximum backward branch should be:
+ * (0xff800000 << 2) = 0xfe000000 = -0x2000000
+ */
+static inline bool is_offset_in_branch_range(long offset)
+{
+ return (offset >= -0x2000000 && offset <= 0x1fffffc && !(offset & 0x3));
+}
+
+static inline bool is_offset_in_cond_branch_range(long offset)
+{
+ return offset >= -0x8000 && offset <= 0x7fff && !(offset & 0x3);
+}
+
+static inline int create_branch(ppc_inst_t *instr, const u32 *addr,
+ unsigned long target, int flags)
+{
+ long offset;
+
+ *instr = ppc_inst(0);
+ offset = target;
+ if (! (flags & BRANCH_ABSOLUTE))
+ offset = offset - (unsigned long)addr;
+
+ /* Check we can represent the target in the instruction format */
+ if (!is_offset_in_branch_range(offset))
+ return 1;
+
+ /* Mask out the flags and target, so they don't step on each other. */
+ *instr = ppc_inst(0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC));
+
+ return 0;
+}
+
+int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
+ unsigned long target, int flags);
+int patch_branch(u32 *addr, unsigned long target, int flags);
+int patch_instruction(u32 *addr, ppc_inst_t instr);
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
+int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr);
+
+/*
+ * The data patching functions patch_uint() and patch_ulong(), etc., must be
+ * called on aligned addresses.
+ *
+ * The instruction patching functions patch_instruction() and similar must be
+ * called on addresses satisfying instruction alignment requirements.
+ */
+
+#ifdef CONFIG_PPC64
+
+int patch_uint(void *addr, unsigned int val);
+int patch_ulong(void *addr, unsigned long val);
+
+#define patch_u64 patch_ulong
+
+#else
+
+static inline int patch_uint(void *addr, unsigned int val)
+{
+ if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned int)))
+ return -EINVAL;
+
+ return patch_instruction(addr, ppc_inst(val));
+}
+
+static inline int patch_ulong(void *addr, unsigned long val)
+{
+ if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned long)))
+ return -EINVAL;
+
+ return patch_instruction(addr, ppc_inst(val));
+}
+
+#endif
+
+#define patch_u32 patch_uint
+
+static inline unsigned long patch_site_addr(s32 *site)
+{
+ return (unsigned long)site + *site;
+}
+
+static inline int patch_instruction_site(s32 *site, ppc_inst_t instr)
+{
+ return patch_instruction((u32 *)patch_site_addr(site), instr);
+}
+
+static inline int patch_branch_site(s32 *site, unsigned long target, int flags)
+{
+ return patch_branch((u32 *)patch_site_addr(site), target, flags);
+}
+
+static inline int modify_instruction(unsigned int *addr, unsigned int clr,
+ unsigned int set)
+{
+ return patch_instruction(addr, ppc_inst((*addr & ~clr) | set));
+}
+
+static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int set)
+{
+ return modify_instruction((unsigned int *)patch_site_addr(site), clr, set);
+}
+
+static inline unsigned int branch_opcode(ppc_inst_t instr)
+{
+ return ppc_inst_primary_opcode(instr) & 0x3F;
+}
+
+static inline int instr_is_branch_iform(ppc_inst_t instr)
+{
+ return branch_opcode(instr) == 18;
+}
+
+static inline int instr_is_branch_bform(ppc_inst_t instr)
+{
+ return branch_opcode(instr) == 16;
+}
+
+int instr_is_relative_branch(ppc_inst_t instr);
+int instr_is_relative_link_branch(ppc_inst_t instr);
+unsigned long branch_target(const u32 *instr);
+int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src);
+bool is_conditional_branch(ppc_inst_t instr);
+
+#define OP_RT_RA_MASK 0xffff0000UL
+#define LIS_R2 (PPC_RAW_LIS(_R2, 0))
+#define ADDIS_R2_R12 (PPC_RAW_ADDIS(_R2, _R12, 0))
+#define ADDI_R2_R2 (PPC_RAW_ADDI(_R2, _R2, 0))
+
+
+static inline unsigned long ppc_function_entry(void *func)
+{
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+ u32 *insn = func;
+
+ /*
+ * A PPC64 ABIv2 function may have a local and a global entry
+ * point. We need to use the local entry point when patching
+ * functions, so identify and step over the global entry point
+ * sequence.
+ *
+ * The global entry point sequence is always of the form:
+ *
+ * addis r2,r12,XXXX
+ * addi r2,r2,XXXX
+ *
+ * A linker optimisation may convert the addis to lis:
+ *
+ * lis r2,XXXX
+ * addi r2,r2,XXXX
+ */
+ if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+ ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+ ((*(insn+1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+ return (unsigned long)(insn + 2);
+ else
+ return (unsigned long)func;
+#elif defined(CONFIG_PPC64_ELF_ABI_V1)
+ /*
+ * On PPC64 ABIv1 the function pointer actually points to the
+ * function's descriptor. The first entry in the descriptor is the
+ * address of the function text.
+ */
+ return ((struct func_desc *)func)->addr;
+#else
+ return (unsigned long)func;
+#endif
+}
+
+static inline unsigned long ppc_global_function_entry(void *func)
+{
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+ /* PPC64 ABIv2 the global entry point is at the address */
+ return (unsigned long)func;
+#else
+ /* All other cases there is no change vs ppc_function_entry() */
+ return ppc_function_entry(func);
+#endif
+}
+
+/*
+ * Wrapper around kallsyms_lookup() to return function entry address:
+ * - For ABIv1, we lookup the dot variant.
+ * - For ABIv2, we return the local entry point.
+ */
+static inline unsigned long ppc_kallsyms_lookup_name(const char *name)
+{
+ unsigned long addr;
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+ /* check for dot variant */
+ char dot_name[1 + KSYM_NAME_LEN];
+ bool dot_appended = false;
+
+ if (strnlen(name, KSYM_NAME_LEN) >= KSYM_NAME_LEN)
+ return 0;
+
+ if (name[0] != '.') {
+ dot_name[0] = '.';
+ dot_name[1] = '\0';
+ strlcat(dot_name, name, sizeof(dot_name));
+ dot_appended = true;
+ } else {
+ dot_name[0] = '\0';
+ strlcat(dot_name, name, sizeof(dot_name));
+ }
+ addr = kallsyms_lookup_name(dot_name);
+ if (!addr && dot_appended)
+ /* Let's try the original non-dot symbol lookup */
+ addr = kallsyms_lookup_name(name);
+#elif defined(CONFIG_PPC64_ELF_ABI_V2)
+ addr = kallsyms_lookup_name(name);
+ if (addr)
+ addr = ppc_function_entry((void *)addr);
+#else
+ addr = kallsyms_lookup_name(name);
+#endif
+ return addr;
+}
+
+/*
+ * Some instruction encodings commonly used in dynamic ftracing
+ * and function live patching.
+ */
+
+/* This must match the definition of STK_GOT in <asm/ppc_asm.h> */
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+#define R2_STACK_OFFSET 24
+#else
+#define R2_STACK_OFFSET 40
+#endif
+
+#define PPC_INST_LD_TOC PPC_RAW_LD(_R2, _R1, R2_STACK_OFFSET)
+
+/* usually preceded by a mflr r0 */
+#define PPC_INST_STD_LR PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF)
+
+#endif /* _ASM_POWERPC_CODE_PATCHING_H */