aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <[email protected]>2024-07-15 08:36:13 -0700
committerLinus Torvalds <[email protected]>2024-07-15 08:36:13 -0700
commita5819099f601c1af5b86b1f5921a56859e45b19a (patch)
tree797e26cbdc5d45e97dd3cd92b27c754c8702d9ec
parent0c3836482481200ead7b416ca80c68a29cfdaabd (diff)
parent94a2bc0f611cd9fa4d26e4679bf7ea4b01b12d56 (diff)
Merge branch 'runtime-constants'
Merge runtime constants infrastructure with implementations for x86 and arm64. This is one of four branches that came out of me looking at profiles of my kernel build filesystem load on my 128-core Altra arm64 system, where pathname walking and the user copies (particularly strncpy_from_user() for fetching the pathname from user space) is very hot. This is a very specialized "instruction alternatives" model where the dentry hash pointer and hash count will be constants for the lifetime of the kernel, but the allocation are not static but done early during the kernel boot. In order to avoid the pointer load and dynamic shift, we just rewrite the constants in the instructions in place. We can't use the "generic" alternative instructions infrastructure, because different architectures do it very differently, and it's actually simpler to just have very specific helpers, with a fallback to the generic ("old") model of just using variables for architectures that do not implement the runtime constant patching infrastructure. Link: https://lore.kernel.org/all/CAHk-=widPe38fUNjUOmX11ByDckaeEo9tN4Eiyke9u1SAtu9sA@mail.gmail.com/ * runtime-constants: arm64: add 'runtime constant' support runtime constants: add x86 architecture support runtime constants: add default dummy infrastructure vfs: dcache: move hashlen_hash() from callers into d_hash()
-rw-r--r--arch/arm64/include/asm/runtime-const.h88
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S3
-rw-r--r--arch/x86/include/asm/runtime-const.h61
-rw-r--r--arch/x86/kernel/vmlinux.lds.S3
-rw-r--r--fs/dcache.c17
-rw-r--r--include/asm-generic/Kbuild1
-rw-r--r--include/asm-generic/runtime-const.h15
-rw-r--r--include/asm-generic/vmlinux.lds.h8
8 files changed, 192 insertions, 4 deletions
diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h
new file mode 100644
index 000000000000..be5915669d23
--- /dev/null
+++ b/arch/arm64/include/asm/runtime-const.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#include <asm/cacheflush.h>
+
+/* Sigh. You can still run arm64 in BE mode */
+#include <asm/byteorder.h>
+
+#define runtime_const_ptr(sym) ({ \
+ typeof(sym) __ret; \
+ asm_inline("1:\t" \
+ "movz %0, #0xcdef\n\t" \
+ "movk %0, #0x89ab, lsl #16\n\t" \
+ "movk %0, #0x4567, lsl #32\n\t" \
+ "movk %0, #0x0123, lsl #48\n\t" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret)); \
+ __ret; })
+
+#define runtime_const_shift_right_32(val, sym) ({ \
+ unsigned long __ret; \
+ asm_inline("1:\t" \
+ "lsr %w0,%w1,#12\n\t" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
+ ".long 1b - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret) \
+ :"r" (0u+(val))); \
+ __ret; })
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/* 16-bit immediate for wide move (movz and movk) in bits 5..20 */
+static inline void __runtime_fixup_16(__le32 *p, unsigned int val)
+{
+ u32 insn = le32_to_cpu(*p);
+ insn &= 0xffe0001f;
+ insn |= (val & 0xffff) << 5;
+ *p = cpu_to_le32(insn);
+}
+
+static inline void __runtime_fixup_caches(void *where, unsigned int insns)
+{
+ unsigned long va = (unsigned long)where;
+ caches_clean_inval_pou(va, va + 4*insns);
+}
+
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ __le32 *p = lm_alias(where);
+ __runtime_fixup_16(p, val);
+ __runtime_fixup_16(p+1, val >> 16);
+ __runtime_fixup_16(p+2, val >> 32);
+ __runtime_fixup_16(p+3, val >> 48);
+ __runtime_fixup_caches(where, 4);
+}
+
+/* Immediate value is 6 bits starting at bit #16 */
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ __le32 *p = lm_alias(where);
+ u32 insn = le32_to_cpu(*p);
+ insn &= 0xffc0ffff;
+ insn |= (val & 63) << 16;
+ *p = cpu_to_le32(insn);
+ __runtime_fixup_caches(where, 1);
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 755a22d4f840..55a8e310ea12 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -264,6 +264,9 @@ SECTIONS
EXIT_DATA
}
+ RUNTIME_CONST(shift, d_hash_shift)
+ RUNTIME_CONST(ptr, dentry_hashtable)
+
PERCPU_SECTION(L1_CACHE_BYTES)
HYPERVISOR_PERCPU_SECTION
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
new file mode 100644
index 000000000000..24e3a53ca255
--- /dev/null
+++ b/arch/x86/include/asm/runtime-const.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#define runtime_const_ptr(sym) ({ \
+ typeof(sym) __ret; \
+ asm_inline("mov %1,%0\n1:\n" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - %c2 - .\n\t" \
+ ".popsection" \
+ :"=r" (__ret) \
+ :"i" ((unsigned long)0x0123456789abcdefull), \
+ "i" (sizeof(long))); \
+ __ret; })
+
+// The 'typeof' will create at _least_ a 32-bit type, but
+// will happily also take a bigger type and the 'shrl' will
+// clear the upper bits
+#define runtime_const_shift_right_32(val, sym) ({ \
+ typeof(0u+(val)) __ret = (val); \
+ asm_inline("shrl $12,%k0\n1:\n" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
+ ".long 1b - 1 - .\n\t" \
+ ".popsection" \
+ :"+r" (__ret)); \
+ __ret; })
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/*
+ * The text patching is trivial - you can only do this at init time,
+ * when the text section hasn't been marked RO, and before the text
+ * has ever been executed.
+ */
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ *(unsigned long *)where = val;
+}
+
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ *(unsigned char *)where = val;
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 3509afc6a672..6e73403e874f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -357,6 +357,9 @@ SECTIONS
PERCPU_SECTION(INTERNODE_CACHE_BYTES)
#endif
+ RUNTIME_CONST(shift, d_hash_shift)
+ RUNTIME_CONST(ptr, dentry_hashtable)
+
. = ALIGN(PAGE_SIZE);
/* freed after init ends here */
diff --git a/fs/dcache.c b/fs/dcache.c
index 4c144519aa70..5f9ffe5c9917 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -35,6 +35,8 @@
#include "internal.h"
#include "mount.h"
+#include <asm/runtime-const.h>
+
/*
* Usage:
* dcache->d_inode->i_lock protects:
@@ -100,9 +102,10 @@ static unsigned int d_hash_shift __ro_after_init;
static struct hlist_bl_head *dentry_hashtable __ro_after_init;
-static inline struct hlist_bl_head *d_hash(unsigned int hash)
+static inline struct hlist_bl_head *d_hash(unsigned long hashlen)
{
- return dentry_hashtable + (hash >> d_hash_shift);
+ return runtime_const_ptr(dentry_hashtable) +
+ runtime_const_shift_right_32(hashlen, d_hash_shift);
}
#define IN_LOOKUP_SHIFT 10
@@ -2110,7 +2113,7 @@ static noinline struct dentry *__d_lookup_rcu_op_compare(
unsigned *seqp)
{
u64 hashlen = name->hash_len;
- struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
+ struct hlist_bl_head *b = d_hash(hashlen);
struct hlist_bl_node *node;
struct dentry *dentry;
@@ -2177,7 +2180,7 @@ struct dentry *__d_lookup_rcu(const struct dentry *parent,
{
u64 hashlen = name->hash_len;
const unsigned char *str = name->name;
- struct hlist_bl_head *b = d_hash(hashlen_hash(hashlen));
+ struct hlist_bl_head *b = d_hash(hashlen);
struct hlist_bl_node *node;
struct dentry *dentry;
@@ -3132,6 +3135,9 @@ static void __init dcache_init_early(void)
0,
0);
d_hash_shift = 32 - d_hash_shift;
+
+ runtime_const_init(shift, d_hash_shift);
+ runtime_const_init(ptr, dentry_hashtable);
}
static void __init dcache_init(void)
@@ -3160,6 +3166,9 @@ static void __init dcache_init(void)
0,
0);
d_hash_shift = 32 - d_hash_shift;
+
+ runtime_const_init(shift, d_hash_shift);
+ runtime_const_init(ptr, dentry_hashtable);
}
/* SLAB cache for __getname() consumers */
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index b20fa25a7e8d..052e5c98c105 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -46,6 +46,7 @@ mandatory-y += pci.h
mandatory-y += percpu.h
mandatory-y += pgalloc.h
mandatory-y += preempt.h
+mandatory-y += runtime-const.h
mandatory-y += rwonce.h
mandatory-y += sections.h
mandatory-y += serial.h
diff --git a/include/asm-generic/runtime-const.h b/include/asm-generic/runtime-const.h
new file mode 100644
index 000000000000..670499459514
--- /dev/null
+++ b/include/asm-generic/runtime-const.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+/*
+ * This is the fallback for when the architecture doesn't
+ * support the runtime const operations.
+ *
+ * We just use the actual symbols as-is.
+ */
+#define runtime_const_ptr(sym) (sym)
+#define runtime_const_shift_right_32(val, sym) ((u32)(val)>>(sym))
+#define runtime_const_init(type,sym) do { } while (0)
+
+#endif
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5703526d6ebf..389a78415b9b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -944,6 +944,14 @@
#define CON_INITCALL \
BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
+#define RUNTIME_NAME(t,x) runtime_##t##_##x
+
+#define RUNTIME_CONST(t,x) \
+ . = ALIGN(8); \
+ RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) { \
+ *(RUNTIME_NAME(t,x)); \
+ }
+
/* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
#define KUNIT_TABLE() \
. = ALIGN(8); \