From af8d27bf15c8d68c60d830552055fcdba5b5f045 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 22 Mar 2024 14:49:36 +0100 Subject: selftests/bpf: Mark uprobe trigger functions with nocf_check attribute Some distros seem to enable the -fcf-protection=branch by default, which breaks our setup on first instruction of uprobe trigger functions and place there endbr64 instruction. Marking them with nocf_check attribute to skip that. Ignoring unknown attribute warning in gcc for bench objects, because nocf_check can be used only when -fcf-protection=branch is enabled, otherwise we get a warning and break compilation. Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240322134936.1075395-1-jolsa@kernel.org --- tools/include/linux/compiler.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/include/linux') diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 7b65566f3e42..8a63a9913495 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -58,6 +58,10 @@ #define noinline #endif +#ifndef __nocf_check +#define __nocf_check __attribute__((nocf_check)) +#endif + /* Are two types/vars the same type (ignoring qualifiers)? */ #ifndef __same_type # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) -- cgit From 7d8296b250f2eed73f1758607926d4d258dea5d4 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 27 Mar 2024 16:23:42 +0100 Subject: bitops: make BYTES_TO_BITS() treewide-available Avoid open-coding that simple expression each time by moving BYTES_TO_BITS() from the probes code to to export it to the rest of the kernel. Simplify the macro while at it. `BITS_PER_LONG / sizeof(long)` always equals to %BITS_PER_BYTE, regardless of the target architecture. Do the same for the tools ecosystem as well (incl. its version of bitops.h). The previous implementation had its implicit type of long, while the new one is int, so adjust the format literal accordingly in the perf code. Suggested-by: Andy Shevchenko Reviewed-by: Przemek Kitszel Acked-by: Yury Norov Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- include/linux/bitops.h | 2 ++ kernel/trace/trace_probe.c | 2 -- tools/include/linux/bitops.h | 2 ++ tools/perf/util/probe-finder.c | 4 +--- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'tools/include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index f7f5a783da2a..e0cd09eb91cd 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -21,6 +21,8 @@ #define BITS_TO_U32(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) #define BITS_TO_BYTES(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(char)) +#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_BYTE) + extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); extern unsigned int __sw_hweight32(unsigned int w); diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index dfe3ee6035ec..87337a0c8e03 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1180,8 +1180,6 @@ parse_probe_arg(char *arg, const struct fetch_type *type, return ret; } -#define BYTES_TO_BITS(nb) ((BITS_PER_LONG * (nb)) / sizeof(long)) - /* Bitfield type needs to be parsed into a fetch function */ static int __parse_bitfield_probe_arg(const char *bf, const struct fetch_type *t, diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h index 7319f6ced108..272f15d0e434 100644 --- a/tools/include/linux/bitops.h +++ b/tools/include/linux/bitops.h @@ -20,6 +20,8 @@ #define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(u32)) #define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(char)) +#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_BYTE) + extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); extern unsigned int __sw_hweight32(unsigned int w); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c8923375e30d..630e16c54ed5 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -186,8 +186,6 @@ static_var: return ret2; } -#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_LONG / sizeof(long)) - static int convert_variable_type(Dwarf_Die *vr_die, struct probe_trace_arg *tvar, const char *cast, bool user_access) @@ -217,7 +215,7 @@ static int convert_variable_type(Dwarf_Die *vr_die, total = dwarf_bytesize(vr_die); if (boffs < 0 || total < 0) return -ENOENT; - ret = snprintf(buf, 16, "b%d@%d/%zd", bsize, boffs, + ret = snprintf(buf, 16, "b%d@%d/%d", bsize, boffs, BYTES_TO_BITS(total)); goto formatted; } -- cgit From 10a04ff09bcc39e0044190ffe9f00f998f13647c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 27 Mar 2024 16:23:48 +0100 Subject: tools: move alignment-related macros to new Currently, tools have *ALIGN*() macros scattered across the unrelated headers, as there are only 3 of them and they were added separately each time on an as-needed basis. Anyway, let's make it more consistent with the kernel headers and allow using those macros outside of the mentioned headers. Create inside the tools/ folder and include it where needed. Signed-off-by: Yury Norov Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- tools/include/linux/align.h | 12 ++++++++++++ tools/include/linux/bitmap.h | 2 +- tools/include/linux/mm.h | 5 +---- 3 files changed, 14 insertions(+), 5 deletions(-) create mode 100644 tools/include/linux/align.h (limited to 'tools/include/linux') diff --git a/tools/include/linux/align.h b/tools/include/linux/align.h new file mode 100644 index 000000000000..14e34ace80dd --- /dev/null +++ b/tools/include/linux/align.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _TOOLS_LINUX_ALIGN_H +#define _TOOLS_LINUX_ALIGN_H + +#include + +#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + +#endif /* _TOOLS_LINUX_ALIGN_H */ diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index f3566ea0f932..8c6852dba04f 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -3,6 +3,7 @@ #define _TOOLS_LINUX_BITMAP_H #include +#include #include #include #include @@ -126,7 +127,6 @@ static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1, #define BITMAP_MEM_ALIGNMENT (8 * sizeof(unsigned long)) #endif #define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1) -#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) static inline bool bitmap_equal(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index f3c82ab5b14c..7a6b98f4e579 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -2,8 +2,8 @@ #ifndef _TOOLS_LINUX_MM_H #define _TOOLS_LINUX_MM_H +#include #include -#include #define PAGE_SHIFT 12 #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) @@ -11,9 +11,6 @@ #define PHYS_ADDR_MAX (~(phys_addr_t)0) -#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) -#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) - #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE) #define __va(x) ((void *)((unsigned long)(x))) -- cgit From a37fbe666c016fd89e4460d0ebfcea05baba46dc Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 27 Mar 2024 16:23:49 +0100 Subject: bitmap: introduce generic optimized bitmap_size() The number of times yet another open coded `BITS_TO_LONGS(nbits) * sizeof(long)` can be spotted is huge. Some generic helper is long overdue. Add one, bitmap_size(), but with one detail. BITS_TO_LONGS() uses DIV_ROUND_UP(). The latter works well when both divident and divisor are compile-time constants or when the divisor is not a pow-of-2. When it is however, the compilers sometimes tend to generate suboptimal code (GCC 13): 48 83 c0 3f add $0x3f,%rax 48 c1 e8 06 shr $0x6,%rax 48 8d 14 c5 00 00 00 00 lea 0x0(,%rax,8),%rdx %BITS_PER_LONG is always a pow-2 (either 32 or 64), but GCC still does full division of `nbits + 63` by it and then multiplication by 8. Instead of BITS_TO_LONGS(), use ALIGN() and then divide by 8. GCC: 8d 50 3f lea 0x3f(%rax),%edx c1 ea 03 shr $0x3,%edx 81 e2 f8 ff ff 1f and $0x1ffffff8,%edx Now it shifts `nbits + 63` by 3 positions (IOW performs fast division by 8) and then masks bits[2:0]. bloat-o-meter: add/remove: 0/0 grow/shrink: 20/133 up/down: 156/-773 (-617) Clang does it better and generates the same code before/after starting from -O1, except that with the ALIGN() approach it uses %edx and thus still saves some bytes: add/remove: 0/0 grow/shrink: 9/133 up/down: 18/-538 (-520) Note that we can't expand DIV_ROUND_UP() by adding a check and using this approach there, as it's used in array declarations where expressions are not allowed. Add this helper to tools/ as well. Reviewed-by: Przemek Kitszel Acked-by: Yury Norov Signed-off-by: Alexander Lobakin Signed-off-by: David S. Miller --- drivers/md/dm-clone-metadata.c | 5 ----- drivers/s390/cio/idset.c | 2 +- include/linux/bitmap.h | 8 +++++--- include/linux/cpumask.h | 2 +- lib/math/prime_numbers.c | 2 -- tools/include/linux/bitmap.h | 7 ++++--- 6 files changed, 11 insertions(+), 15 deletions(-) (limited to 'tools/include/linux') diff --git a/drivers/md/dm-clone-metadata.c b/drivers/md/dm-clone-metadata.c index c43d55672bce..47c1fa7aad8b 100644 --- a/drivers/md/dm-clone-metadata.c +++ b/drivers/md/dm-clone-metadata.c @@ -465,11 +465,6 @@ static void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd) /*---------------------------------------------------------------------------*/ -static size_t bitmap_size(unsigned long nr_bits) -{ - return BITS_TO_LONGS(nr_bits) * sizeof(long); -} - static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words, unsigned long nr_regions) { diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c index 0a1105a483bf..e5f28370a903 100644 --- a/drivers/s390/cio/idset.c +++ b/drivers/s390/cio/idset.c @@ -18,7 +18,7 @@ struct idset { static inline unsigned long idset_bitmap_size(int num_ssid, int num_id) { - return BITS_TO_LONGS(num_ssid * num_id) * sizeof(unsigned long); + return bitmap_size(size_mul(num_ssid, num_id)); } static struct idset *idset_new(int num_ssid, int num_id) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 914c23e96f26..363e0b184a45 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -226,9 +226,11 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig, #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) +#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE) + static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { - unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + unsigned int len = bitmap_size(nbits); if (small_const_nbits(nbits)) *dst = 0; @@ -238,7 +240,7 @@ static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) { - unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + unsigned int len = bitmap_size(nbits); if (small_const_nbits(nbits)) *dst = ~0UL; @@ -249,7 +251,7 @@ static inline void bitmap_fill(unsigned long *dst, unsigned int nbits) static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits) { - unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + unsigned int len = bitmap_size(nbits); if (small_const_nbits(nbits)) *dst = *src; diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 1c29947db848..6519f9c77709 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -853,7 +853,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) */ static inline unsigned int cpumask_size(void) { - return BITS_TO_LONGS(large_cpumask_bits) * sizeof(long); + return bitmap_size(large_cpumask_bits); } /* diff --git a/lib/math/prime_numbers.c b/lib/math/prime_numbers.c index d42cebf7407f..d3b64b10da1c 100644 --- a/lib/math/prime_numbers.c +++ b/lib/math/prime_numbers.c @@ -6,8 +6,6 @@ #include #include -#define bitmap_size(nbits) (BITS_TO_LONGS(nbits) * sizeof(unsigned long)) - struct primes { struct rcu_head rcu; unsigned long last, sz; diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 8c6852dba04f..210c13b1b857 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -26,13 +26,14 @@ bool __bitmap_intersects(const unsigned long *bitmap1, #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) +#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE) + static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = 0UL; else { - int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); - memset(dst, 0, len); + memset(dst, 0, bitmap_size(nbits)); } } @@ -84,7 +85,7 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, */ static inline unsigned long *bitmap_zalloc(int nbits) { - return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long)); + return calloc(1, bitmap_size(nbits)); } /* -- cgit From 343ca8131c35ba132d200fd9752b60e65357924d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 4 Apr 2024 14:45:36 -0700 Subject: selftests/bpf: add fp-leaking precise subprog result tests Add selftests validating that BPF verifier handles precision marking for SCALAR registers derived from r10 (fp) register correctly. Given `r0 = (s8)r10;` syntax is not supported by older Clang compilers, use the raw BPF instruction syntax to maximize compatibility. Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20240404214536.3551295-2-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/include/linux/filter.h | 18 +++++ .../bpf/progs/verifier_subprog_precision.c | 89 ++++++++++++++++++++++ 2 files changed, 107 insertions(+) (limited to 'tools/include/linux') diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h index 736bdeccdfe4..65aa8ce142e5 100644 --- a/tools/include/linux/filter.h +++ b/tools/include/linux/filter.h @@ -111,6 +111,24 @@ .off = 0, \ .imm = IMM }) +/* Short form of movsx, dst_reg = (s8,s16,s32)src_reg */ + +#define BPF_MOVSX64_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +#define BPF_MOVSX32_REG(DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \ diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index 6f5d19665cf6..4a58e0398e72 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -6,6 +6,7 @@ #include #include #include "bpf_misc.h" +#include <../../../tools/include/linux/filter.h> #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) @@ -76,6 +77,94 @@ __naked int subprog_result_precise(void) ); } +__naked __noinline __used +static unsigned long fp_leaking_subprog() +{ + asm volatile ( + ".8byte %[r0_eq_r10_cast_s8];" + "exit;" + :: __imm_insn(r0_eq_r10_cast_s8, BPF_MOVSX64_REG(BPF_REG_0, BPF_REG_10, 8)) + ); +} + +__naked __noinline __used +static unsigned long sneaky_fp_leaking_subprog() +{ + asm volatile ( + "r1 = r10;" + ".8byte %[r0_eq_r1_cast_s8];" + "exit;" + :: __imm_insn(r0_eq_r1_cast_s8, BPF_MOVSX64_REG(BPF_REG_0, BPF_REG_1, 8)) + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("6: (0f) r1 += r0") +__msg("mark_precise: frame0: last_idx 6 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4") +__msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3") +__msg("mark_precise: frame0: regs=r0 stack= before 10: (95) exit") +__msg("mark_precise: frame1: regs=r0 stack= before 9: (bf) r0 = (s8)r10") +__msg("7: R0_w=scalar") +__naked int fp_precise_subprog_result(void) +{ + asm volatile ( + "call fp_leaking_subprog;" + /* use subprog's returned value (which is derived from r10=fp + * register), as index into vals array, forcing all of that to + * be known precisely + */ + "r0 &= 3;" + "r0 *= 4;" + "r1 = %[vals];" + /* force precision marking */ + "r1 += r0;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("6: (0f) r1 += r0") +__msg("mark_precise: frame0: last_idx 6 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4") +__msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3") +__msg("mark_precise: frame0: regs=r0 stack= before 11: (95) exit") +__msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = (s8)r1") +/* here r1 is marked precise, even though it's fp register, but that's fine + * because by the time we get out of subprogram it has to be derived from r10 + * anyways, at which point we'll break precision chain + */ +__msg("mark_precise: frame1: regs=r1 stack= before 9: (bf) r1 = r10") +__msg("7: R0_w=scalar") +__naked int sneaky_fp_precise_subprog_result(void) +{ + asm volatile ( + "call sneaky_fp_leaking_subprog;" + /* use subprog's returned value (which is derived from r10=fp + * register), as index into vals array, forcing all of that to + * be known precisely + */ + "r0 &= 3;" + "r0 *= 4;" + "r1 = %[vals];" + /* force precision marking */ + "r1 += r0;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common + ); +} + SEC("?raw_tp") __success __log_level(2) __msg("9: (0f) r1 += r0") -- cgit From cd88c11c6d89bcd1851736d853eca57bbde1b042 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Apr 2024 17:23:27 -0300 Subject: tools lib rbtree: Pick some improvements from the kernel rbtree code The tools/lib/rbtree.c code came from the kernel, removing the EXPORT_SYMBOL() that make sense only there, unfortunately it is not being checked with tools/perf/check_headers.sh, will try to remedy this, till then pick the improvements from: b0687c1119b4e8c8 ("lib/rbtree: use '+' instead of '|' for setting color.") That I noticed by doing: diff -u tools/lib/rbtree.c lib/rbtree.c diff -u tools/include/linux/rbtree_augmented.h include/linux/rbtree_augmented.h There is one other cases, but lets pick it in separate patches. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Andrew Morton Cc: Noah Goldstein Link: https://lore.kernel.org/lkml/ZigZzeFoukzRKG1Q@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/rbtree_augmented.h | 4 ++-- tools/lib/rbtree.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/include/linux') diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h index 570bb9794421..95483c7d81df 100644 --- a/tools/include/linux/rbtree_augmented.h +++ b/tools/include/linux/rbtree_augmented.h @@ -158,13 +158,13 @@ RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, \ static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) { - rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; + rb->__rb_parent_color = rb_color(rb) + (unsigned long)p; } static inline void rb_set_parent_color(struct rb_node *rb, struct rb_node *p, int color) { - rb->__rb_parent_color = (unsigned long)p | color; + rb->__rb_parent_color = (unsigned long)p + color; } static inline void diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c index 727396de6be5..9e7307186b7f 100644 --- a/tools/lib/rbtree.c +++ b/tools/lib/rbtree.c @@ -58,7 +58,7 @@ static inline void rb_set_black(struct rb_node *rb) { - rb->__rb_parent_color |= RB_BLACK; + rb->__rb_parent_color += RB_BLACK; } static inline struct rb_node *rb_red_parent(struct rb_node *red) -- cgit From 450f941ea9dce7256a485baf36f2b8d85a64e1c0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 26 Apr 2024 15:52:09 -0300 Subject: tools headers: Synchronize linux/bits.h with the kernel sources To pick up the changes in this cset: 3c7a8e190bc58081 ("uapi: introduce uapi-friendly macros for GENMASK") That just causes perf to rebuild. Its just some macros going to an uapi header that we now have to grab a copy into tools/ as well. This addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/include/linux/bits.h include/linux/bits.h Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Paolo Bonzini Link: https://lore.kernel.org/lkml/ZiwJsFOBez0MS4r9@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/bits.h | 8 +------- tools/include/uapi/asm-generic/bitsperlong.h | 4 ++++ tools/include/uapi/linux/bits.h | 15 +++++++++++++++ tools/perf/check-headers.sh | 1 + 4 files changed, 21 insertions(+), 7 deletions(-) create mode 100644 tools/include/uapi/linux/bits.h (limited to 'tools/include/linux') diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 7c0cf5031abe..0eb24d21aac2 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -4,6 +4,7 @@ #include #include +#include #include #define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG)) @@ -30,15 +31,8 @@ #define GENMASK_INPUT_CHECK(h, l) 0 #endif -#define __GENMASK(h, l) \ - (((~UL(0)) - (UL(1) << (l)) + 1) & \ - (~UL(0) >> (BITS_PER_LONG - 1 - (h)))) #define GENMASK(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) - -#define __GENMASK_ULL(h, l) \ - (((~ULL(0)) - (ULL(1) << (l)) + 1) & \ - (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) #define GENMASK_ULL(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) diff --git a/tools/include/uapi/asm-generic/bitsperlong.h b/tools/include/uapi/asm-generic/bitsperlong.h index 352cb81947b8..fadb3f857f28 100644 --- a/tools/include/uapi/asm-generic/bitsperlong.h +++ b/tools/include/uapi/asm-generic/bitsperlong.h @@ -24,4 +24,8 @@ #endif #endif +#ifndef __BITS_PER_LONG_LONG +#define __BITS_PER_LONG_LONG 64 +#endif + #endif /* _UAPI__ASM_GENERIC_BITS_PER_LONG */ diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h new file mode 100644 index 000000000000..3c2a101986a3 --- /dev/null +++ b/tools/include/uapi/linux/bits.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* bits.h: Macros for dealing with bitmasks. */ + +#ifndef _UAPI_LINUX_BITS_H +#define _UAPI_LINUX_BITS_H + +#define __GENMASK(h, l) \ + (((~_UL(0)) - (_UL(1) << (l)) + 1) & \ + (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) + +#define __GENMASK_ULL(h, l) \ + (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ + (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) + +#endif /* _UAPI_LINUX_BITS_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 9c84fd049070..672421b858ac 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -9,6 +9,7 @@ FILES=( "include/uapi/linux/const.h" "include/uapi/drm/drm.h" "include/uapi/drm/i915_drm.h" + "include/uapi/linux/bits.h" "include/uapi/linux/fadvise.h" "include/uapi/linux/fscrypt.h" "include/uapi/linux/kcmp.h" -- cgit From 9c313ccdfc079f71f16c9b3d3a6c6d60996b9367 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 21 Apr 2024 00:38:37 +0200 Subject: bitops: Change function return types from long to int Change the return types of bitops functions (ffs, fls, and fns) from long to int. The expected return values are in the range [0, 64], for which int is sufficient. Additionally, int aligns well with the return types of the corresponding __builtin_* functions, potentially reducing overall type conversions. Many of the existing bitops functions already return an int and don't need to be changed. The bitops functions in arch/ should be considered separately. Adjust some return variables to match the function return types. With GCC 13 and defconfig, these changes reduced the size of a test kernel image by 5,432 bytes on arm64 and by 248 bytes on riscv; there were no changes in size on x86_64, powerpc, or m68k. Signed-off-by: Thorsten Blum Signed-off-by: Arnd Bergmann --- include/asm-generic/bitops/__ffs.h | 4 ++-- include/asm-generic/bitops/__fls.h | 4 ++-- include/asm-generic/bitops/builtin-__ffs.h | 2 +- include/asm-generic/bitops/builtin-__fls.h | 2 +- include/linux/bitops.h | 6 +++--- tools/include/asm-generic/bitops/__ffs.h | 4 ++-- tools/include/asm-generic/bitops/__fls.h | 4 ++-- tools/include/linux/bitops.h | 2 +- 8 files changed, 14 insertions(+), 14 deletions(-) (limited to 'tools/include/linux') diff --git a/include/asm-generic/bitops/__ffs.h b/include/asm-generic/bitops/__ffs.h index 446fea6dda78..2d08c750c8a7 100644 --- a/include/asm-generic/bitops/__ffs.h +++ b/include/asm-generic/bitops/__ffs.h @@ -10,9 +10,9 @@ * * Undefined if no bit exists, so code should check against 0 first. */ -static __always_inline unsigned long generic___ffs(unsigned long word) +static __always_inline unsigned int generic___ffs(unsigned long word) { - int num = 0; + unsigned int num = 0; #if BITS_PER_LONG == 64 if ((word & 0xffffffff) == 0) { diff --git a/include/asm-generic/bitops/__fls.h b/include/asm-generic/bitops/__fls.h index 54ccccf96e21..e974ec932ec1 100644 --- a/include/asm-generic/bitops/__fls.h +++ b/include/asm-generic/bitops/__fls.h @@ -10,9 +10,9 @@ * * Undefined if no set bit exists, so code should check against 0 first. */ -static __always_inline unsigned long generic___fls(unsigned long word) +static __always_inline unsigned int generic___fls(unsigned long word) { - int num = BITS_PER_LONG - 1; + unsigned int num = BITS_PER_LONG - 1; #if BITS_PER_LONG == 64 if (!(word & (~0ul << 32))) { diff --git a/include/asm-generic/bitops/builtin-__ffs.h b/include/asm-generic/bitops/builtin-__ffs.h index 87024da44d10..cf4b3d33bf96 100644 --- a/include/asm-generic/bitops/builtin-__ffs.h +++ b/include/asm-generic/bitops/builtin-__ffs.h @@ -8,7 +8,7 @@ * * Undefined if no bit exists, so code should check against 0 first. */ -static __always_inline unsigned long __ffs(unsigned long word) +static __always_inline unsigned int __ffs(unsigned long word) { return __builtin_ctzl(word); } diff --git a/include/asm-generic/bitops/builtin-__fls.h b/include/asm-generic/bitops/builtin-__fls.h index 43a5aa9afbdb..6d72fc8a5259 100644 --- a/include/asm-generic/bitops/builtin-__fls.h +++ b/include/asm-generic/bitops/builtin-__fls.h @@ -8,7 +8,7 @@ * * Undefined if no set bit exists, so code should check against 0 first. */ -static __always_inline unsigned long __fls(unsigned long word) +static __always_inline unsigned int __fls(unsigned long word) { return (sizeof(word) * 8) - 1 - __builtin_clzl(word); } diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 2ba557e067fe..f60220f119e2 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -200,7 +200,7 @@ static __always_inline __s64 sign_extend64(__u64 value, int index) return (__s64)(value << shift) >> shift; } -static inline unsigned fls_long(unsigned long l) +static inline unsigned int fls_long(unsigned long l) { if (sizeof(l) == 4) return fls(l); @@ -236,7 +236,7 @@ static inline int get_count_order_long(unsigned long l) * The result is not defined if no bits are set, so check that @word * is non-zero before calling this. */ -static inline unsigned long __ffs64(u64 word) +static inline unsigned int __ffs64(u64 word) { #if BITS_PER_LONG == 32 if (((u32)word) == 0UL) @@ -252,7 +252,7 @@ static inline unsigned long __ffs64(u64 word) * @word: The word to search * @n: Bit to find */ -static inline unsigned long fns(unsigned long word, unsigned int n) +static inline unsigned int fns(unsigned long word, unsigned int n) { unsigned int bit; diff --git a/tools/include/asm-generic/bitops/__ffs.h b/tools/include/asm-generic/bitops/__ffs.h index 9d1310519497..2d94c1e9b2f3 100644 --- a/tools/include/asm-generic/bitops/__ffs.h +++ b/tools/include/asm-generic/bitops/__ffs.h @@ -11,9 +11,9 @@ * * Undefined if no bit exists, so code should check against 0 first. */ -static __always_inline unsigned long __ffs(unsigned long word) +static __always_inline unsigned int __ffs(unsigned long word) { - int num = 0; + unsigned int num = 0; #if __BITS_PER_LONG == 64 if ((word & 0xffffffff) == 0) { diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h index 54ccccf96e21..e974ec932ec1 100644 --- a/tools/include/asm-generic/bitops/__fls.h +++ b/tools/include/asm-generic/bitops/__fls.h @@ -10,9 +10,9 @@ * * Undefined if no set bit exists, so code should check against 0 first. */ -static __always_inline unsigned long generic___fls(unsigned long word) +static __always_inline unsigned int generic___fls(unsigned long word) { - int num = BITS_PER_LONG - 1; + unsigned int num = BITS_PER_LONG - 1; #if BITS_PER_LONG == 64 if (!(word & (~0ul << 32))) { diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h index 7319f6ced108..8e073a111d2a 100644 --- a/tools/include/linux/bitops.h +++ b/tools/include/linux/bitops.h @@ -70,7 +70,7 @@ static inline unsigned long hweight_long(unsigned long w) return sizeof(w) == 4 ? hweight32(w) : hweight64(w); } -static inline unsigned fls_long(unsigned long l) +static inline unsigned int fls_long(unsigned long l) { if (sizeof(l) == 4) return fls(l); -- cgit From bbed8b9ffed16572357c5a348c7172846d106cfe Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Apr 2024 17:27:57 -0300 Subject: tools lib rbtree: pick some improvements from the kernel rbtree code The tools/lib/rbtree.c code came from the kernel. Remove the EXPORT_SYMBOL() that make sense only there. Unfortunately it is not being checked with tools/perf/check_headers.sh. Will try to remedy this. Until then pick the improvements from: b0687c1119b4e8c8 ("lib/rbtree: use '+' instead of '|' for setting color.") That I noticed by doing: diff -u tools/lib/rbtree.c lib/rbtree.c diff -u tools/include/linux/rbtree_augmented.h include/linux/rbtree_augmented.h There is one other cases, but lets pick it in separate patches. Link: https://lkml.kernel.org/r/ZigZzeFoukzRKG1Q@x1 Signed-off-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Noah Goldstein Signed-off-by: Andrew Morton --- tools/include/linux/rbtree_augmented.h | 4 ++-- tools/lib/rbtree.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/include/linux') diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h index 570bb9794421..95483c7d81df 100644 --- a/tools/include/linux/rbtree_augmented.h +++ b/tools/include/linux/rbtree_augmented.h @@ -158,13 +158,13 @@ RB_DECLARE_CALLBACKS(RBSTATIC, RBNAME, \ static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) { - rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; + rb->__rb_parent_color = rb_color(rb) + (unsigned long)p; } static inline void rb_set_parent_color(struct rb_node *rb, struct rb_node *p, int color) { - rb->__rb_parent_color = (unsigned long)p | color; + rb->__rb_parent_color = (unsigned long)p + color; } static inline void diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c index 727396de6be5..9e7307186b7f 100644 --- a/tools/lib/rbtree.c +++ b/tools/lib/rbtree.c @@ -58,7 +58,7 @@ static inline void rb_set_black(struct rb_node *rb) { - rb->__rb_parent_color |= RB_BLACK; + rb->__rb_parent_color += RB_BLACK; } static inline struct rb_node *rb_red_parent(struct rb_node *red) -- cgit