From b558c96ffa53f4b3dd52b774e4fb7a52982ab52b Mon Sep 17 00:00:00 2001 From: Jim Cromie Date: Mon, 19 Dec 2011 17:11:18 -0500 Subject: dynamic_debug: make dynamic-debug supersede DEBUG ccflag If CONFIG_DYNAMIC_DEBUG is defined, honor it over DEBUG, so that pr_debug()s are controllable, instead of always-on. When DEBUG is also defined, change _DPRINTK_FLAGS_DEFAULT to enable printing by default. Also adding _DPRINTK_FLAGS_INCL_MODNAME would be nice, but there are numerous cases of pr_debug(NAME ": ...), which would result in double printing of module-name. So defer this until things settle. Cc: David Miller Cc: Joe Perches Signed-off-by: Jim Cromie Signed-off-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0eac07c95255..f486f635e7b5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2687,14 +2687,14 @@ int netdev_info(const struct net_device *dev, const char *format, ...); #define MODULE_ALIAS_NETDEV(device) \ MODULE_ALIAS("netdev-" device) -#if defined(DEBUG) -#define netdev_dbg(__dev, format, args...) \ - netdev_printk(KERN_DEBUG, __dev, format, ##args) -#elif defined(CONFIG_DYNAMIC_DEBUG) +#if defined(CONFIG_DYNAMIC_DEBUG) #define netdev_dbg(__dev, format, args...) \ do { \ dynamic_netdev_dbg(__dev, format, ##args); \ } while (0) +#elif defined(DEBUG) +#define netdev_dbg(__dev, format, args...) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args) #else #define netdev_dbg(__dev, format, args...) \ ({ \ -- cgit From c5905afb0ee6550b42c49213da1c22d67316c194 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Feb 2012 08:31:31 +0100 Subject: static keys: Introduce 'struct static_key', static_key_true()/false() and static_key_slow_[inc|dec]() So here's a boot tested patch on top of Jason's series that does all the cleanups I talked about and turns jump labels into a more intuitive to use facility. It should also address the various misconceptions and confusions that surround jump labels. Typical usage scenarios: #include struct static_key key = STATIC_KEY_INIT_TRUE; if (static_key_false(&key)) do unlikely code else do likely code Or: if (static_key_true(&key)) do likely code else do unlikely code The static key is modified via: static_key_slow_inc(&key); ... static_key_slow_dec(&key); The 'slow' prefix makes it abundantly clear that this is an expensive operation. I've updated all in-kernel code to use this everywhere. Note that I (intentionally) have not pushed through the rename blindly through to the lowest levels: the actual jump-label patching arch facility should be named like that, so we want to decouple jump labels from the static-key facility a bit. On non-jump-label enabled architectures static keys default to likely()/unlikely() branches. Signed-off-by: Ingo Molnar Acked-by: Jason Baron Acked-by: Steven Rostedt Cc: a.p.zijlstra@chello.nl Cc: mathieu.desnoyers@efficios.com Cc: davem@davemloft.net Cc: ddaney.cavm@gmail.com Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20120222085809.GA26397@elte.hu Signed-off-by: Ingo Molnar --- arch/Kconfig | 29 ++++--- arch/ia64/include/asm/paravirt.h | 6 +- arch/ia64/kernel/paravirt.c | 4 +- arch/mips/include/asm/jump_label.h | 2 +- arch/powerpc/include/asm/jump_label.h | 2 +- arch/s390/include/asm/jump_label.h | 2 +- arch/sparc/include/asm/jump_label.h | 2 +- arch/x86/include/asm/jump_label.h | 6 +- arch/x86/include/asm/paravirt.h | 6 +- arch/x86/kernel/kvm.c | 4 +- arch/x86/kernel/paravirt.c | 4 +- arch/x86/kvm/mmu_audit.c | 8 +- include/linux/jump_label.h | 139 ++++++++++++++++++++++++---------- include/linux/netdevice.h | 4 +- include/linux/netfilter.h | 6 +- include/linux/perf_event.h | 12 +-- include/linux/static_key.h | 1 + include/linux/tracepoint.h | 8 +- include/net/sock.h | 6 +- kernel/events/core.c | 16 ++-- kernel/jump_label.c | 128 ++++++++++++++++++------------- kernel/sched/core.c | 18 ++--- kernel/sched/fair.c | 8 +- kernel/sched/sched.h | 14 ++-- kernel/tracepoint.c | 20 ++--- net/core/dev.c | 24 +++--- net/core/net-sysfs.c | 4 +- net/core/sock.c | 4 +- net/core/sysctl_net_core.c | 4 +- net/ipv4/tcp_memcontrol.c | 6 +- net/netfilter/core.c | 6 +- 31 files changed, 298 insertions(+), 205 deletions(-) create mode 100644 include/linux/static_key.h (limited to 'include/linux/netdevice.h') diff --git a/arch/Kconfig b/arch/Kconfig index 4f55c736be11..5b448a74d0f7 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -47,18 +47,29 @@ config KPROBES If in doubt, say "N". config JUMP_LABEL - bool "Optimize trace point call sites" + bool "Optimize very unlikely/likely branches" depends on HAVE_ARCH_JUMP_LABEL help + This option enables a transparent branch optimization that + makes certain almost-always-true or almost-always-false branch + conditions even cheaper to execute within the kernel. + + Certain performance-sensitive kernel code, such as trace points, + scheduler functionality, networking code and KVM have such + branches and include support for this optimization technique. + If it is detected that the compiler has support for "asm goto", - the kernel will compile trace point locations with just a - nop instruction. When trace points are enabled, the nop will - be converted to a jump to the trace function. This technique - lowers overhead and stress on the branch prediction of the - processor. - - On i386, options added to the compiler flags may increase - the size of the kernel slightly. + the kernel will compile such branches with just a nop + instruction. When the condition flag is toggled to true, the + nop will be converted to a jump instruction to execute the + conditional block of instructions. + + This technique lowers overhead and stress on the branch prediction + of the processor and generally makes the kernel faster. The update + of the condition is slower, but those are always very rare. + + ( On 32-bit x86, the necessary options added to the compiler + flags may increase the size of the kernel slightly. ) config OPTPROBES def_bool y diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h index 32551d304cd7..b149b88ea795 100644 --- a/arch/ia64/include/asm/paravirt.h +++ b/arch/ia64/include/asm/paravirt.h @@ -281,9 +281,9 @@ paravirt_init_missing_ticks_accounting(int cpu) pv_time_ops.init_missing_ticks_accounting(cpu); } -struct jump_label_key; -extern struct jump_label_key paravirt_steal_enabled; -extern struct jump_label_key paravirt_steal_rq_enabled; +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; static inline int paravirt_do_steal_accounting(unsigned long *new_itm) diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index 100868216c55..1b22f6de2932 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -634,8 +634,8 @@ struct pv_irq_ops pv_irq_ops = { * pv_time_ops * time operations */ -struct jump_label_key paravirt_steal_enabled; -struct jump_label_key paravirt_steal_rq_enabled; +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; static int ia64_native_do_steal_accounting(unsigned long *new_itm) diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 1881b316ca45..4d6d77ed9b9d 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -20,7 +20,7 @@ #define WORD_INSN ".word" #endif -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:\tnop\n\t" "nop\n\t" diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 938986e412f1..ae098c438f00 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -17,7 +17,7 @@ #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:\n\t" "nop\n\t" diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 95a6cf2b5b67..6c32190dc73e 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -13,7 +13,7 @@ #define ASM_ALIGN ".balign 4" #endif -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("0: brcl 0,0\n" ".pushsection __jump_table, \"aw\"\n" diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index fc73a82366f8..5080d16a832f 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -7,7 +7,7 @@ #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:\n\t" "nop\n\t" diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a32b18ce6ead..3a16c1483b45 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -9,12 +9,12 @@ #define JUMP_LABEL_NOP_SIZE 5 -#define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" +#define STATIC_KEY_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" -static __always_inline bool arch_static_branch(struct jump_label_key *key) +static __always_inline bool arch_static_branch(struct static_key *key) { asm goto("1:" - JUMP_LABEL_INITIAL_NOP + STATIC_KEY_INITIAL_NOP ".pushsection __jump_table, \"aw\" \n\t" _ASM_ALIGN "\n\t" _ASM_PTR "1b, %l[l_yes], %c0 \n\t" diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index a7d2db9a74fb..c0180fd372d2 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -230,9 +230,9 @@ static inline unsigned long long paravirt_sched_clock(void) return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); } -struct jump_label_key; -extern struct jump_label_key paravirt_steal_enabled; -extern struct jump_label_key paravirt_steal_rq_enabled; +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; static inline u64 paravirt_steal_clock(int cpu) { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6f176b..694d801bf606 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -438,9 +438,9 @@ void __init kvm_guest_init(void) static __init int activate_jump_labels(void) { if (has_steal_clock) { - jump_label_inc(¶virt_steal_enabled); + static_key_slow_inc(¶virt_steal_enabled); if (steal_acc) - jump_label_inc(¶virt_steal_rq_enabled); + static_key_slow_inc(¶virt_steal_rq_enabled); } return 0; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index d90272e6bc40..ada2f99388dd 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -202,8 +202,8 @@ static void native_flush_tlb_single(unsigned long addr) __native_flush_tlb_single(addr); } -struct jump_label_key paravirt_steal_enabled; -struct jump_label_key paravirt_steal_rq_enabled; +struct static_key paravirt_steal_enabled; +struct static_key paravirt_steal_rq_enabled; static u64 native_steal_clock(int cpu) { diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index fe15dcc07a6b..ea7b4fd34676 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -234,7 +234,7 @@ static void audit_vcpu_spte(struct kvm_vcpu *vcpu) } static bool mmu_audit; -static struct jump_label_key mmu_audit_key; +static struct static_key mmu_audit_key; static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { @@ -250,7 +250,7 @@ static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point) { - if (static_branch((&mmu_audit_key))) + if (static_key_false((&mmu_audit_key))) __kvm_mmu_audit(vcpu, point); } @@ -259,7 +259,7 @@ static void mmu_audit_enable(void) if (mmu_audit) return; - jump_label_inc(&mmu_audit_key); + static_key_slow_inc(&mmu_audit_key); mmu_audit = true; } @@ -268,7 +268,7 @@ static void mmu_audit_disable(void) if (!mmu_audit) return; - jump_label_dec(&mmu_audit_key); + static_key_slow_dec(&mmu_audit_key); mmu_audit = false; } diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index f7c69580fea7..2172da2d9bb4 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -9,15 +9,15 @@ * * Jump labels provide an interface to generate dynamic branches using * self-modifying code. Assuming toolchain and architecture support the result - * of a "if (static_branch(&key))" statement is a unconditional branch (which + * of a "if (static_key_false(&key))" statement is a unconditional branch (which * defaults to false - and the true block is placed out of line). * - * However at runtime we can change the 'static' branch target using - * jump_label_{inc,dec}(). These function as a 'reference' count on the key + * However at runtime we can change the branch target using + * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key * object and for as long as there are references all branches referring to * that particular key will point to the (out of line) true block. * - * Since this relies on modifying code the jump_label_{inc,dec}() functions + * Since this relies on modifying code the static_key_slow_{inc,dec}() functions * must be considered absolute slow paths (machine wide synchronization etc.). * OTOH, since the affected branches are unconditional their runtime overhead * will be absolutely minimal, esp. in the default (off) case where the total @@ -26,12 +26,26 @@ * * When the control is directly exposed to userspace it is prudent to delay the * decrement to avoid high frequency code modifications which can (and do) - * cause significant performance degradation. Struct jump_label_key_deferred and - * jump_label_dec_deferred() provide for this. + * cause significant performance degradation. Struct static_key_deferred and + * static_key_slow_dec_deferred() provide for this. * * Lacking toolchain and or architecture support, it falls back to a simple * conditional branch. - */ + * + * struct static_key my_key = STATIC_KEY_INIT_TRUE; + * + * if (static_key_true(&my_key)) { + * } + * + * will result in the true case being in-line and starts the key with a single + * reference. Mixing static_key_true() and static_key_false() on the same key is not + * allowed. + * + * Not initializing the key (static data is initialized to 0s anyway) is the + * same as using STATIC_KEY_INIT_FALSE and static_key_false() is + * equivalent with static_branch(). + * +*/ #include #include @@ -39,16 +53,17 @@ #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) -struct jump_label_key { +struct static_key { atomic_t enabled; +/* Set lsb bit to 1 if branch is default true, 0 ot */ struct jump_entry *entries; #ifdef CONFIG_MODULES - struct jump_label_mod *next; + struct static_key_mod *next; #endif }; -struct jump_label_key_deferred { - struct jump_label_key key; +struct static_key_deferred { + struct static_key key; unsigned long timeout; struct delayed_work work; }; @@ -66,13 +81,34 @@ struct module; #ifdef HAVE_JUMP_LABEL -#ifdef CONFIG_MODULES -#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL, NULL} -#else -#define JUMP_LABEL_INIT {ATOMIC_INIT(0), NULL} -#endif +#define JUMP_LABEL_TRUE_BRANCH 1UL + +static +inline struct jump_entry *jump_label_get_entries(struct static_key *key) +{ + return (struct jump_entry *)((unsigned long)key->entries + & ~JUMP_LABEL_TRUE_BRANCH); +} + +static inline bool jump_label_get_branch_default(struct static_key *key) +{ + if ((unsigned long)key->entries & JUMP_LABEL_TRUE_BRANCH) + return true; + return false; +} + +static __always_inline bool static_key_false(struct static_key *key) +{ + return arch_static_branch(key); +} -static __always_inline bool static_branch(struct jump_label_key *key) +static __always_inline bool static_key_true(struct static_key *key) +{ + return !static_key_false(key); +} + +/* Deprecated. Please use 'static_key_false() instead. */ +static __always_inline bool static_branch(struct static_key *key) { return arch_static_branch(key); } @@ -88,21 +124,24 @@ extern void arch_jump_label_transform(struct jump_entry *entry, extern void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type); extern int jump_label_text_reserved(void *start, void *end); -extern void jump_label_inc(struct jump_label_key *key); -extern void jump_label_dec(struct jump_label_key *key); -extern void jump_label_dec_deferred(struct jump_label_key_deferred *key); -extern bool jump_label_enabled(struct jump_label_key *key); +extern void static_key_slow_inc(struct static_key *key); +extern void static_key_slow_dec(struct static_key *key); +extern void static_key_slow_dec_deferred(struct static_key_deferred *key); +extern bool static_key_enabled(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); -extern void jump_label_rate_limit(struct jump_label_key_deferred *key, - unsigned long rl); +extern void +jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); + +#define STATIC_KEY_INIT_TRUE ((struct static_key) \ + { .enabled = ATOMIC_INIT(1), .entries = (void *)1 }) +#define STATIC_KEY_INIT_FALSE ((struct static_key) \ + { .enabled = ATOMIC_INIT(0), .entries = (void *)0 }) #else /* !HAVE_JUMP_LABEL */ #include -#define JUMP_LABEL_INIT {ATOMIC_INIT(0)} - -struct jump_label_key { +struct static_key { atomic_t enabled; }; @@ -110,30 +149,45 @@ static __always_inline void jump_label_init(void) { } -struct jump_label_key_deferred { - struct jump_label_key key; +struct static_key_deferred { + struct static_key key; }; -static __always_inline bool static_branch(struct jump_label_key *key) +static __always_inline bool static_key_false(struct static_key *key) +{ + if (unlikely(atomic_read(&key->enabled)) > 0) + return true; + return false; +} + +static __always_inline bool static_key_true(struct static_key *key) { - if (unlikely(atomic_read(&key->enabled))) + if (likely(atomic_read(&key->enabled)) > 0) return true; return false; } -static inline void jump_label_inc(struct jump_label_key *key) +/* Deprecated. Please use 'static_key_false() instead. */ +static __always_inline bool static_branch(struct static_key *key) +{ + if (unlikely(atomic_read(&key->enabled)) > 0) + return true; + return false; +} + +static inline void static_key_slow_inc(struct static_key *key) { atomic_inc(&key->enabled); } -static inline void jump_label_dec(struct jump_label_key *key) +static inline void static_key_slow_dec(struct static_key *key) { atomic_dec(&key->enabled); } -static inline void jump_label_dec_deferred(struct jump_label_key_deferred *key) +static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) { - jump_label_dec(&key->key); + static_key_slow_dec(&key->key); } static inline int jump_label_text_reserved(void *start, void *end) @@ -144,9 +198,9 @@ static inline int jump_label_text_reserved(void *start, void *end) static inline void jump_label_lock(void) {} static inline void jump_label_unlock(void) {} -static inline bool jump_label_enabled(struct jump_label_key *key) +static inline bool static_key_enabled(struct static_key *key) { - return !!atomic_read(&key->enabled); + return (atomic_read(&key->enabled) > 0); } static inline int jump_label_apply_nops(struct module *mod) @@ -154,13 +208,20 @@ static inline int jump_label_apply_nops(struct module *mod) return 0; } -static inline void jump_label_rate_limit(struct jump_label_key_deferred *key, +static inline void +jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { } + +#define STATIC_KEY_INIT_TRUE ((struct static_key) \ + { .enabled = ATOMIC_INIT(1) }) +#define STATIC_KEY_INIT_FALSE ((struct static_key) \ + { .enabled = ATOMIC_INIT(0) }) + #endif /* HAVE_JUMP_LABEL */ -#define jump_label_key_enabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(1), }) -#define jump_label_key_disabled ((struct jump_label_key){ .enabled = ATOMIC_INIT(0), }) +#define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE +#define jump_label_enabled static_key_enabled #endif /* _LINUX_JUMP_LABEL_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0eac07c95255..7dfaae7846ab 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -214,8 +214,8 @@ enum { #include #ifdef CONFIG_RPS -#include -extern struct jump_label_key rps_needed; +#include +extern struct static_key rps_needed; #endif struct neighbour; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index b809265607d0..29734be334c1 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -163,13 +163,13 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[]; extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #if defined(CONFIG_JUMP_LABEL) -#include -extern struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +#include +extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) { if (__builtin_constant_p(pf) && __builtin_constant_p(hook)) - return static_branch(&nf_hooks_needed[pf][hook]); + return static_key_false(&nf_hooks_needed[pf][hook]); return !list_empty(&nf_hooks[pf][hook]); } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 412b790f5da6..0d21e6f1cf53 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -514,7 +514,7 @@ struct perf_guest_info_callbacks { #include #include #include -#include +#include #include #include @@ -1038,7 +1038,7 @@ static inline int is_software_event(struct perf_event *event) return event->pmu->task_ctx_nr == perf_sw_context; } -extern struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; +extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; extern void __perf_sw_event(u32, u64, struct pt_regs *, u64); @@ -1066,7 +1066,7 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) { struct pt_regs hot_regs; - if (static_branch(&perf_swevent_enabled[event_id])) { + if (static_key_false(&perf_swevent_enabled[event_id])) { if (!regs) { perf_fetch_caller_regs(&hot_regs); regs = &hot_regs; @@ -1075,12 +1075,12 @@ perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) } } -extern struct jump_label_key_deferred perf_sched_events; +extern struct static_key_deferred perf_sched_events; static inline void perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task) { - if (static_branch(&perf_sched_events.key)) + if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_in(prev, task); } @@ -1089,7 +1089,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, { perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, NULL, 0); - if (static_branch(&perf_sched_events.key)) + if (static_key_false(&perf_sched_events.key)) __perf_event_task_sched_out(prev, next); } diff --git a/include/linux/static_key.h b/include/linux/static_key.h new file mode 100644 index 000000000000..27bd3f8a0857 --- /dev/null +++ b/include/linux/static_key.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index fc36da97ff7e..bd96ecd0e05c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -17,7 +17,7 @@ #include #include #include -#include +#include struct module; struct tracepoint; @@ -29,7 +29,7 @@ struct tracepoint_func { struct tracepoint { const char *name; /* Tracepoint name */ - struct jump_label_key key; + struct static_key key; void (*regfunc)(void); void (*unregfunc)(void); struct tracepoint_func __rcu *funcs; @@ -145,7 +145,7 @@ static inline void tracepoint_synchronize_unregister(void) extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - if (static_branch(&__tracepoint_##name.key)) \ + if (static_key_false(&__tracepoint_##name.key)) \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ @@ -188,7 +188,7 @@ static inline void tracepoint_synchronize_unregister(void) __attribute__((section("__tracepoints_strings"))) = #name; \ struct tracepoint __tracepoint_##name \ __attribute__((section("__tracepoints"))) = \ - { __tpstrtab_##name, JUMP_LABEL_INIT, reg, unreg, NULL };\ + { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\ static struct tracepoint * const __tracepoint_ptr_##name __used \ __attribute__((section("__tracepoints_ptrs"))) = \ &__tracepoint_##name; diff --git a/include/net/sock.h b/include/net/sock.h index 91c1c8baf020..dcde2d9268cd 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -55,7 +55,7 @@ #include #include #include -#include +#include #include #include @@ -924,13 +924,13 @@ inline void sk_refcnt_debug_release(const struct sock *sk) #endif /* SOCK_REFCNT_DEBUG */ #if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) -extern struct jump_label_key memcg_socket_limit_enabled; +extern struct static_key memcg_socket_limit_enabled; static inline struct cg_proto *parent_cg_proto(struct proto *proto, struct cg_proto *cg_proto) { return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg)); } -#define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled) +#define mem_cgroup_sockets_enabled static_key_false(&memcg_socket_limit_enabled) #else #define mem_cgroup_sockets_enabled 0 static inline struct cg_proto *parent_cg_proto(struct proto *proto, diff --git a/kernel/events/core.c b/kernel/events/core.c index 7c3b9de55f6b..5e0f8bb89b2b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -128,7 +128,7 @@ enum event_type_t { * perf_sched_events : >0 events exist * perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu */ -struct jump_label_key_deferred perf_sched_events __read_mostly; +struct static_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); static atomic_t nr_mmap_events __read_mostly; @@ -2769,7 +2769,7 @@ static void free_event(struct perf_event *event) if (!event->parent) { if (event->attach_state & PERF_ATTACH_TASK) - jump_label_dec_deferred(&perf_sched_events); + static_key_slow_dec_deferred(&perf_sched_events); if (event->attr.mmap || event->attr.mmap_data) atomic_dec(&nr_mmap_events); if (event->attr.comm) @@ -2780,7 +2780,7 @@ static void free_event(struct perf_event *event) put_callchain_buffers(); if (is_cgroup_event(event)) { atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); - jump_label_dec_deferred(&perf_sched_events); + static_key_slow_dec_deferred(&perf_sched_events); } } @@ -4982,7 +4982,7 @@ fail: return err; } -struct jump_label_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; +struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX]; static void sw_perf_event_destroy(struct perf_event *event) { @@ -4990,7 +4990,7 @@ static void sw_perf_event_destroy(struct perf_event *event) WARN_ON(event->parent); - jump_label_dec(&perf_swevent_enabled[event_id]); + static_key_slow_dec(&perf_swevent_enabled[event_id]); swevent_hlist_put(event); } @@ -5020,7 +5020,7 @@ static int perf_swevent_init(struct perf_event *event) if (err) return err; - jump_label_inc(&perf_swevent_enabled[event_id]); + static_key_slow_inc(&perf_swevent_enabled[event_id]); event->destroy = sw_perf_event_destroy; } @@ -5843,7 +5843,7 @@ done: if (!event->parent) { if (event->attach_state & PERF_ATTACH_TASK) - jump_label_inc(&perf_sched_events.key); + static_key_slow_inc(&perf_sched_events.key); if (event->attr.mmap || event->attr.mmap_data) atomic_inc(&nr_mmap_events); if (event->attr.comm) @@ -6081,7 +6081,7 @@ SYSCALL_DEFINE5(perf_event_open, * - that may need work on context switch */ atomic_inc(&per_cpu(perf_cgroup_events, event->cpu)); - jump_label_inc(&perf_sched_events.key); + static_key_slow_inc(&perf_sched_events.key); } /* diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 543782e7cdd2..bf9dcadbb53a 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #ifdef HAVE_JUMP_LABEL @@ -29,10 +29,11 @@ void jump_label_unlock(void) mutex_unlock(&jump_label_mutex); } -bool jump_label_enabled(struct jump_label_key *key) +bool static_key_enabled(struct static_key *key) { - return !!atomic_read(&key->enabled); + return (atomic_read(&key->enabled) > 0); } +EXPORT_SYMBOL_GPL(static_key_enabled); static int jump_label_cmp(const void *a, const void *b) { @@ -58,22 +59,26 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop) sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); } -static void jump_label_update(struct jump_label_key *key, int enable); +static void jump_label_update(struct static_key *key, int enable); -void jump_label_inc(struct jump_label_key *key) +void static_key_slow_inc(struct static_key *key) { if (atomic_inc_not_zero(&key->enabled)) return; jump_label_lock(); - if (atomic_read(&key->enabled) == 0) - jump_label_update(key, JUMP_LABEL_ENABLE); + if (atomic_read(&key->enabled) == 0) { + if (!jump_label_get_branch_default(key)) + jump_label_update(key, JUMP_LABEL_ENABLE); + else + jump_label_update(key, JUMP_LABEL_DISABLE); + } atomic_inc(&key->enabled); jump_label_unlock(); } -EXPORT_SYMBOL_GPL(jump_label_inc); +EXPORT_SYMBOL_GPL(static_key_slow_inc); -static void __jump_label_dec(struct jump_label_key *key, +static void __static_key_slow_dec(struct static_key *key, unsigned long rate_limit, struct delayed_work *work) { if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { @@ -85,32 +90,35 @@ static void __jump_label_dec(struct jump_label_key *key, if (rate_limit) { atomic_inc(&key->enabled); schedule_delayed_work(work, rate_limit); - } else - jump_label_update(key, JUMP_LABEL_DISABLE); - + } else { + if (!jump_label_get_branch_default(key)) + jump_label_update(key, JUMP_LABEL_DISABLE); + else + jump_label_update(key, JUMP_LABEL_ENABLE); + } jump_label_unlock(); } -EXPORT_SYMBOL_GPL(jump_label_dec); static void jump_label_update_timeout(struct work_struct *work) { - struct jump_label_key_deferred *key = - container_of(work, struct jump_label_key_deferred, work.work); - __jump_label_dec(&key->key, 0, NULL); + struct static_key_deferred *key = + container_of(work, struct static_key_deferred, work.work); + __static_key_slow_dec(&key->key, 0, NULL); } -void jump_label_dec(struct jump_label_key *key) +void static_key_slow_dec(struct static_key *key) { - __jump_label_dec(key, 0, NULL); + __static_key_slow_dec(key, 0, NULL); } +EXPORT_SYMBOL_GPL(static_key_slow_dec); -void jump_label_dec_deferred(struct jump_label_key_deferred *key) +void static_key_slow_dec_deferred(struct static_key_deferred *key) { - __jump_label_dec(&key->key, key->timeout, &key->work); + __static_key_slow_dec(&key->key, key->timeout, &key->work); } +EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); - -void jump_label_rate_limit(struct jump_label_key_deferred *key, +void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { key->timeout = rl; @@ -153,7 +161,7 @@ void __weak __init_or_module arch_jump_label_transform_static(struct jump_entry arch_jump_label_transform(entry, type); } -static void __jump_label_update(struct jump_label_key *key, +static void __jump_label_update(struct static_key *key, struct jump_entry *entry, struct jump_entry *stop, int enable) { @@ -170,27 +178,40 @@ static void __jump_label_update(struct jump_label_key *key, } } +static enum jump_label_type jump_label_type(struct static_key *key) +{ + bool true_branch = jump_label_get_branch_default(key); + bool state = static_key_enabled(key); + + if ((!true_branch && state) || (true_branch && !state)) + return JUMP_LABEL_ENABLE; + + return JUMP_LABEL_DISABLE; +} + void __init jump_label_init(void) { struct jump_entry *iter_start = __start___jump_table; struct jump_entry *iter_stop = __stop___jump_table; - struct jump_label_key *key = NULL; + struct static_key *key = NULL; struct jump_entry *iter; jump_label_lock(); jump_label_sort_entries(iter_start, iter_stop); for (iter = iter_start; iter < iter_stop; iter++) { - struct jump_label_key *iterk; + struct static_key *iterk; - iterk = (struct jump_label_key *)(unsigned long)iter->key; - arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? - JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE); + iterk = (struct static_key *)(unsigned long)iter->key; + arch_jump_label_transform_static(iter, jump_label_type(iterk)); if (iterk == key) continue; key = iterk; - key->entries = iter; + /* + * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. + */ + *((unsigned long *)&key->entries) += (unsigned long)iter; #ifdef CONFIG_MODULES key->next = NULL; #endif @@ -200,8 +221,8 @@ void __init jump_label_init(void) #ifdef CONFIG_MODULES -struct jump_label_mod { - struct jump_label_mod *next; +struct static_key_mod { + struct static_key_mod *next; struct jump_entry *entries; struct module *mod; }; @@ -221,9 +242,9 @@ static int __jump_label_mod_text_reserved(void *start, void *end) start, end); } -static void __jump_label_mod_update(struct jump_label_key *key, int enable) +static void __jump_label_mod_update(struct static_key *key, int enable) { - struct jump_label_mod *mod = key->next; + struct static_key_mod *mod = key->next; while (mod) { struct module *m = mod->mod; @@ -254,11 +275,7 @@ void jump_label_apply_nops(struct module *mod) return; for (iter = iter_start; iter < iter_stop; iter++) { - struct jump_label_key *iterk; - - iterk = (struct jump_label_key *)(unsigned long)iter->key; - arch_jump_label_transform_static(iter, jump_label_enabled(iterk) ? - JUMP_LABEL_ENABLE : JUMP_LABEL_DISABLE); + arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE); } } @@ -267,8 +284,8 @@ static int jump_label_add_module(struct module *mod) struct jump_entry *iter_start = mod->jump_entries; struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; struct jump_entry *iter; - struct jump_label_key *key = NULL; - struct jump_label_mod *jlm; + struct static_key *key = NULL; + struct static_key_mod *jlm; /* if the module doesn't have jump label entries, just return */ if (iter_start == iter_stop) @@ -277,28 +294,30 @@ static int jump_label_add_module(struct module *mod) jump_label_sort_entries(iter_start, iter_stop); for (iter = iter_start; iter < iter_stop; iter++) { - if (iter->key == (jump_label_t)(unsigned long)key) - continue; + struct static_key *iterk; - key = (struct jump_label_key *)(unsigned long)iter->key; + iterk = (struct static_key *)(unsigned long)iter->key; + if (iterk == key) + continue; + key = iterk; if (__module_address(iter->key) == mod) { - atomic_set(&key->enabled, 0); - key->entries = iter; + /* + * Set key->entries to iter, but preserve JUMP_LABEL_TRUE_BRANCH. + */ + *((unsigned long *)&key->entries) += (unsigned long)iter; key->next = NULL; continue; } - - jlm = kzalloc(sizeof(struct jump_label_mod), GFP_KERNEL); + jlm = kzalloc(sizeof(struct static_key_mod), GFP_KERNEL); if (!jlm) return -ENOMEM; - jlm->mod = mod; jlm->entries = iter; jlm->next = key->next; key->next = jlm; - if (jump_label_enabled(key)) + if (jump_label_type(key) == JUMP_LABEL_ENABLE) __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE); } @@ -310,14 +329,14 @@ static void jump_label_del_module(struct module *mod) struct jump_entry *iter_start = mod->jump_entries; struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; struct jump_entry *iter; - struct jump_label_key *key = NULL; - struct jump_label_mod *jlm, **prev; + struct static_key *key = NULL; + struct static_key_mod *jlm, **prev; for (iter = iter_start; iter < iter_stop; iter++) { if (iter->key == (jump_label_t)(unsigned long)key) continue; - key = (struct jump_label_key *)(unsigned long)iter->key; + key = (struct static_key *)(unsigned long)iter->key; if (__module_address(iter->key) == mod) continue; @@ -419,9 +438,10 @@ int jump_label_text_reserved(void *start, void *end) return ret; } -static void jump_label_update(struct jump_label_key *key, int enable) +static void jump_label_update(struct static_key *key, int enable) { - struct jump_entry *entry = key->entries, *stop = __stop___jump_table; + struct jump_entry *stop = __stop___jump_table; + struct jump_entry *entry = jump_label_get_entries(key); #ifdef CONFIG_MODULES struct module *mod = __module_address((unsigned long)key); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5255c9d2e053..112c6824476b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -162,13 +162,13 @@ static int sched_feat_show(struct seq_file *m, void *v) #ifdef HAVE_JUMP_LABEL -#define jump_label_key__true jump_label_key_enabled -#define jump_label_key__false jump_label_key_disabled +#define jump_label_key__true STATIC_KEY_INIT_TRUE +#define jump_label_key__false STATIC_KEY_INIT_FALSE #define SCHED_FEAT(name, enabled) \ jump_label_key__##enabled , -struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { +struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { #include "features.h" }; @@ -176,14 +176,14 @@ struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR] = { static void sched_feat_disable(int i) { - if (jump_label_enabled(&sched_feat_keys[i])) - jump_label_dec(&sched_feat_keys[i]); + if (static_key_enabled(&sched_feat_keys[i])) + static_key_slow_dec(&sched_feat_keys[i]); } static void sched_feat_enable(int i) { - if (!jump_label_enabled(&sched_feat_keys[i])) - jump_label_inc(&sched_feat_keys[i]); + if (!static_key_enabled(&sched_feat_keys[i])) + static_key_slow_inc(&sched_feat_keys[i]); } #else static void sched_feat_disable(int i) { }; @@ -894,7 +894,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) delta -= irq_delta; #endif #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING - if (static_branch((¶virt_steal_rq_enabled))) { + if (static_key_false((¶virt_steal_rq_enabled))) { u64 st; steal = paravirt_steal_clock(cpu_of(rq)); @@ -2756,7 +2756,7 @@ void account_idle_time(cputime_t cputime) static __always_inline bool steal_account_process_tick(void) { #ifdef CONFIG_PARAVIRT - if (static_branch(¶virt_steal_enabled)) { + if (static_key_false(¶virt_steal_enabled)) { u64 steal, st = 0; steal = paravirt_steal_clock(smp_processor_id()); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7c6414fc669d..423547ada38a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1399,20 +1399,20 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) #ifdef CONFIG_CFS_BANDWIDTH #ifdef HAVE_JUMP_LABEL -static struct jump_label_key __cfs_bandwidth_used; +static struct static_key __cfs_bandwidth_used; static inline bool cfs_bandwidth_used(void) { - return static_branch(&__cfs_bandwidth_used); + return static_key_false(&__cfs_bandwidth_used); } void account_cfs_bandwidth_used(int enabled, int was_enabled) { /* only need to count groups transitioning between enabled/!enabled */ if (enabled && !was_enabled) - jump_label_inc(&__cfs_bandwidth_used); + static_key_slow_inc(&__cfs_bandwidth_used); else if (!enabled && was_enabled) - jump_label_dec(&__cfs_bandwidth_used); + static_key_slow_dec(&__cfs_bandwidth_used); } #else /* HAVE_JUMP_LABEL */ static bool cfs_bandwidth_used(void) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 98c0c2623db8..b4cd6d8ea150 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -611,7 +611,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) * Tunables that become constants when CONFIG_SCHED_DEBUG is off: */ #ifdef CONFIG_SCHED_DEBUG -# include +# include # define const_debug __read_mostly #else # define const_debug const @@ -630,18 +630,18 @@ enum { #undef SCHED_FEAT #if defined(CONFIG_SCHED_DEBUG) && defined(HAVE_JUMP_LABEL) -static __always_inline bool static_branch__true(struct jump_label_key *key) +static __always_inline bool static_branch__true(struct static_key *key) { - return likely(static_branch(key)); /* Not out of line branch. */ + return static_key_true(key); /* Not out of line branch. */ } -static __always_inline bool static_branch__false(struct jump_label_key *key) +static __always_inline bool static_branch__false(struct static_key *key) { - return unlikely(static_branch(key)); /* Out of line branch. */ + return static_key_false(key); /* Out of line branch. */ } #define SCHED_FEAT(name, enabled) \ -static __always_inline bool static_branch_##name(struct jump_label_key *key) \ +static __always_inline bool static_branch_##name(struct static_key *key) \ { \ return static_branch__##enabled(key); \ } @@ -650,7 +650,7 @@ static __always_inline bool static_branch_##name(struct jump_label_key *key) \ #undef SCHED_FEAT -extern struct jump_label_key sched_feat_keys[__SCHED_FEAT_NR]; +extern struct static_key sched_feat_keys[__SCHED_FEAT_NR]; #define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x])) #else /* !(SCHED_DEBUG && HAVE_JUMP_LABEL) */ #define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x)) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index f1539decd99d..d96ba22dabfa 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include extern struct tracepoint * const __start___tracepoints_ptrs[]; extern struct tracepoint * const __stop___tracepoints_ptrs[]; @@ -256,9 +256,9 @@ static void set_tracepoint(struct tracepoint_entry **entry, { WARN_ON(strcmp((*entry)->name, elem->name) != 0); - if (elem->regfunc && !jump_label_enabled(&elem->key) && active) + if (elem->regfunc && !static_key_enabled(&elem->key) && active) elem->regfunc(); - else if (elem->unregfunc && jump_label_enabled(&elem->key) && !active) + else if (elem->unregfunc && static_key_enabled(&elem->key) && !active) elem->unregfunc(); /* @@ -269,10 +269,10 @@ static void set_tracepoint(struct tracepoint_entry **entry, * is used. */ rcu_assign_pointer(elem->funcs, (*entry)->funcs); - if (active && !jump_label_enabled(&elem->key)) - jump_label_inc(&elem->key); - else if (!active && jump_label_enabled(&elem->key)) - jump_label_dec(&elem->key); + if (active && !static_key_enabled(&elem->key)) + static_key_slow_inc(&elem->key); + else if (!active && static_key_enabled(&elem->key)) + static_key_slow_dec(&elem->key); } /* @@ -283,11 +283,11 @@ static void set_tracepoint(struct tracepoint_entry **entry, */ static void disable_tracepoint(struct tracepoint *elem) { - if (elem->unregfunc && jump_label_enabled(&elem->key)) + if (elem->unregfunc && static_key_enabled(&elem->key)) elem->unregfunc(); - if (jump_label_enabled(&elem->key)) - jump_label_dec(&elem->key); + if (static_key_enabled(&elem->key)) + static_key_slow_dec(&elem->key); rcu_assign_pointer(elem->funcs, NULL); } diff --git a/net/core/dev.c b/net/core/dev.c index 115dee1d985d..da7ce7f0e566 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -134,7 +134,7 @@ #include #include #include -#include +#include #include #include "net-sysfs.h" @@ -1441,11 +1441,11 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); -static struct jump_label_key netstamp_needed __read_mostly; +static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL -/* We are not allowed to call jump_label_dec() from irq context +/* We are not allowed to call static_key_slow_dec() from irq context * If net_disable_timestamp() is called from irq context, defer the - * jump_label_dec() calls. + * static_key_slow_dec() calls. */ static atomic_t netstamp_needed_deferred; #endif @@ -1457,12 +1457,12 @@ void net_enable_timestamp(void) if (deferred) { while (--deferred) - jump_label_dec(&netstamp_needed); + static_key_slow_dec(&netstamp_needed); return; } #endif WARN_ON(in_interrupt()); - jump_label_inc(&netstamp_needed); + static_key_slow_inc(&netstamp_needed); } EXPORT_SYMBOL(net_enable_timestamp); @@ -1474,19 +1474,19 @@ void net_disable_timestamp(void) return; } #endif - jump_label_dec(&netstamp_needed); + static_key_slow_dec(&netstamp_needed); } EXPORT_SYMBOL(net_disable_timestamp); static inline void net_timestamp_set(struct sk_buff *skb) { skb->tstamp.tv64 = 0; - if (static_branch(&netstamp_needed)) + if (static_key_false(&netstamp_needed)) __net_timestamp(skb); } #define net_timestamp_check(COND, SKB) \ - if (static_branch(&netstamp_needed)) { \ + if (static_key_false(&netstamp_needed)) { \ if ((COND) && !(SKB)->tstamp.tv64) \ __net_timestamp(SKB); \ } \ @@ -2660,7 +2660,7 @@ EXPORT_SYMBOL(__skb_get_rxhash); struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly; EXPORT_SYMBOL(rps_sock_flow_table); -struct jump_label_key rps_needed __read_mostly; +struct static_key rps_needed __read_mostly; static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, @@ -2945,7 +2945,7 @@ int netif_rx(struct sk_buff *skb) trace_netif_rx(skb); #ifdef CONFIG_RPS - if (static_branch(&rps_needed)) { + if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; @@ -3309,7 +3309,7 @@ int netif_receive_skb(struct sk_buff *skb) return NET_RX_SUCCESS; #ifdef CONFIG_RPS - if (static_branch(&rps_needed)) { + if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu, ret; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index a1727cda03d7..495586232aa1 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -608,10 +608,10 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, spin_unlock(&rps_map_lock); if (map) - jump_label_inc(&rps_needed); + static_key_slow_inc(&rps_needed); if (old_map) { kfree_rcu(old_map, rcu); - jump_label_dec(&rps_needed); + static_key_slow_dec(&rps_needed); } free_cpumask_var(mask); return len; diff --git a/net/core/sock.c b/net/core/sock.c index 3e81fd2e3c75..3a4e5817a2a7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -111,7 +111,7 @@ #include #include #include -#include +#include #include #include @@ -184,7 +184,7 @@ void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; -struct jump_label_key memcg_socket_limit_enabled; +struct static_key memcg_socket_limit_enabled; EXPORT_SYMBOL(memcg_socket_limit_enabled); /* diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d05559d4d9cd..0c2850874254 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -69,9 +69,9 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, if (sock_table != orig_sock_table) { rcu_assign_pointer(rps_sock_flow_table, sock_table); if (sock_table) - jump_label_inc(&rps_needed); + static_key_slow_inc(&rps_needed); if (orig_sock_table) { - jump_label_dec(&rps_needed); + static_key_slow_dec(&rps_needed); synchronize_rcu(); vfree(orig_sock_table); } diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 49978788a9dc..602fb305365f 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -111,7 +111,7 @@ void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); if (val != RESOURCE_MAX) - jump_label_dec(&memcg_socket_limit_enabled); + static_key_slow_dec(&memcg_socket_limit_enabled); } EXPORT_SYMBOL(tcp_destroy_cgroup); @@ -143,9 +143,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) net->ipv4.sysctl_tcp_mem[i]); if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) - jump_label_dec(&memcg_socket_limit_enabled); + static_key_slow_dec(&memcg_socket_limit_enabled); else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) - jump_label_inc(&memcg_socket_limit_enabled); + static_key_slow_inc(&memcg_socket_limit_enabled); return 0; } diff --git a/net/netfilter/core.c b/net/netfilter/core.c index b4e8ff05b301..e1b7e051332e 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -56,7 +56,7 @@ struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; EXPORT_SYMBOL(nf_hooks); #if defined(CONFIG_JUMP_LABEL) -struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); #endif @@ -77,7 +77,7 @@ int nf_register_hook(struct nf_hook_ops *reg) list_add_rcu(®->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); #if defined(CONFIG_JUMP_LABEL) - jump_label_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); + static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif return 0; } @@ -89,7 +89,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg) list_del_rcu(®->list); mutex_unlock(&nf_hook_mutex); #if defined(CONFIG_JUMP_LABEL) - jump_label_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); + static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); } -- cgit From 3bdc0eba0b8b47797f4a76e377dd8360f317450f Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Sat, 11 Feb 2012 15:39:30 +0000 Subject: net: Add framework to allow sending packets with customized CRC. This is useful for testing RX handling of frames with bad CRCs. Requires driver support to actually put the packet on the wire properly. Signed-off-by: Ben Greear Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- arch/alpha/include/asm/socket.h | 3 +++ arch/arm/include/asm/socket.h | 3 +++ arch/avr32/include/asm/socket.h | 3 +++ arch/cris/include/asm/socket.h | 3 +++ arch/frv/include/asm/socket.h | 3 +++ arch/h8300/include/asm/socket.h | 3 +++ arch/ia64/include/asm/socket.h | 3 +++ arch/m32r/include/asm/socket.h | 3 +++ arch/m68k/include/asm/socket.h | 3 +++ arch/mips/include/asm/socket.h | 3 +++ arch/mn10300/include/asm/socket.h | 3 +++ arch/parisc/include/asm/socket.h | 4 ++++ arch/powerpc/include/asm/socket.h | 3 +++ arch/s390/include/asm/socket.h | 3 +++ arch/sparc/include/asm/socket.h | 4 ++++ arch/xtensa/include/asm/socket.h | 3 +++ include/asm-generic/socket.h | 4 ++++ include/linux/if.h | 2 ++ include/linux/netdevice.h | 8 +++++++- include/linux/skbuff.h | 4 +++- include/net/sock.h | 4 ++++ net/core/skbuff.c | 1 + net/core/sock.c | 5 +++++ net/packet/af_packet.c | 32 ++++++++++++++++++++++++++++---- 24 files changed, 104 insertions(+), 6 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h index 16449d330dae..dcb221a4b5be 100644 --- a/arch/alpha/include/asm/socket.h +++ b/arch/alpha/include/asm/socket.h @@ -73,6 +73,9 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h index d958c74e5260..6433cadb6ed4 100644 --- a/arch/arm/include/asm/socket.h +++ b/arch/arm/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h index 30078f98b3ab..a473f8c6a9aa 100644 --- a/arch/avr32/include/asm/socket.h +++ b/arch/avr32/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* __ASM_AVR32_SOCKET_H */ diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h index 048aba64600c..ae52825021af 100644 --- a/arch/cris/include/asm/socket.h +++ b/arch/cris/include/asm/socket.h @@ -68,6 +68,9 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h index 7a361810f3cc..a5b1d7dbb205 100644 --- a/arch/frv/include/asm/socket.h +++ b/arch/frv/include/asm/socket.h @@ -66,5 +66,8 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h index e7bbfcee5b99..ec4554e7b04b 100644 --- a/arch/h8300/include/asm/socket.h +++ b/arch/h8300/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h index ced62de9d5a9..41fc28a4a18a 100644 --- a/arch/ia64/include/asm/socket.h +++ b/arch/ia64/include/asm/socket.h @@ -75,4 +75,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_IA64_SOCKET_H */ diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h index 696cb4c7ca4e..a15f40b52783 100644 --- a/arch/m32r/include/asm/socket.h +++ b/arch/m32r/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_M32R_SOCKET_H */ diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h index e8b41a6775f9..d1be684edf97 100644 --- a/arch/m68k/include/asm/socket.h +++ b/arch/m68k/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h index 52104872e9e3..a2ed6fdad4e0 100644 --- a/arch/mips/include/asm/socket.h +++ b/arch/mips/include/asm/socket.h @@ -86,6 +86,9 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #ifdef __KERNEL__ /** sock_type - Socket types diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h index 013fcc51698f..820463a484b8 100644 --- a/arch/mn10300/include/asm/socket.h +++ b/arch/mn10300/include/asm/socket.h @@ -66,4 +66,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h index f717c9bec16f..1b52c2c31a7a 100644 --- a/arch/parisc/include/asm/socket.h +++ b/arch/parisc/include/asm/socket.h @@ -65,6 +65,10 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 0x4023 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 0x4024 + + /* O_NONBLOCK clashes with the bits used for socket types. Therefore we * have to define SOCK_NONBLOCK to a different value here. */ diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h index fe1c0b478fd7..3d5179bb122f 100644 --- a/arch/powerpc/include/asm/socket.h +++ b/arch/powerpc/include/asm/socket.h @@ -73,4 +73,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_POWERPC_SOCKET_H */ diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h index 581702fa1b0c..c91b720965c0 100644 --- a/arch/s390/include/asm/socket.h +++ b/arch/s390/include/asm/socket.h @@ -74,4 +74,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _ASM_SOCKET_H */ diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h index 68e2e2746f6f..bea1568ae4af 100644 --- a/arch/sparc/include/asm/socket.h +++ b/arch/sparc/include/asm/socket.h @@ -62,6 +62,10 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 0x0026 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 0x0027 + + /* Security levels - as per NRL IPv6 - don't actually do anything */ #define SO_SECURITY_AUTHENTICATION 0x5001 #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h index 74818b161362..e36c68184920 100644 --- a/arch/xtensa/include/asm/socket.h +++ b/arch/xtensa/include/asm/socket.h @@ -77,4 +77,7 @@ #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* _XTENSA_SOCKET_H */ diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h index d9aaac0c36d4..b1bea03274d5 100644 --- a/include/asm-generic/socket.h +++ b/include/asm-generic/socket.h @@ -68,4 +68,8 @@ #define SO_WIFI_STATUS 41 #define SCM_WIFI_STATUS SO_WIFI_STATUS #define SO_PEEK_OFF 42 + +/* Instruct lower device to use last 4-bytes of skb data as FCS */ +#define SO_NOFCS 43 + #endif /* __ASM_GENERIC_SOCKET_H */ diff --git a/include/linux/if.h b/include/linux/if.h index 06b6ef60c821..f995c663c493 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -80,6 +80,8 @@ * skbs on transmit */ #define IFF_UNICAST_FLT 0x20000 /* Supports unicast filtering */ #define IFF_TEAM_PORT 0x40000 /* device used as team port */ +#define IFF_SUPP_NOFCS 0x80000 /* device supports sending custom FCS */ + #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0eac07c95255..f1b7d037c2c5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1082,7 +1082,8 @@ struct net_device { const struct header_ops *header_ops; unsigned int flags; /* interface flags (a la BSD) */ - unsigned int priv_flags; /* Like 'flags' but invisible to userspace. */ + unsigned int priv_flags; /* Like 'flags' but invisible to userspace. + * See if.h for definitions. */ unsigned short gflags; unsigned short padded; /* How much padding added by alloc_netdev() */ @@ -2650,6 +2651,11 @@ static inline int netif_is_bond_slave(struct net_device *dev) return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; } +static inline bool netif_supports_nofcs(struct net_device *dev) +{ + return dev->priv_flags & IFF_SUPP_NOFCS; +} + extern struct pernet_operations __net_initdata loopback_net_ops; /* Logging, debugging and troubleshooting/diagnostic helpers. */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c11a44ea1bf4..06a4c0fd7bef 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -361,6 +361,7 @@ typedef unsigned char *sk_buff_data_t; * ports. * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not + * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @dma_cookie: a cookie to one of several possible DMA operations * done by skb DMA functions * @secmark: security marking @@ -459,7 +460,8 @@ struct sk_buff { __u8 l4_rxhash:1; __u8 wifi_acked_valid:1; __u8 wifi_acked:1; - /* 10/12 bit hole (depending on ndisc_nodetype presence) */ + __u8 no_fcs:1; + /* 9/11 bit hole (depending on ndisc_nodetype presence) */ kmemcheck_bitfield_end(flags2); #ifdef CONFIG_NET_DMA diff --git a/include/net/sock.h b/include/net/sock.h index 9c0553b9e451..ba761e7de252 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -615,6 +615,10 @@ enum sock_flags { SOCK_RXQ_OVFL, SOCK_ZEROCOPY, /* buffers from userspace */ SOCK_WIFI_STATUS, /* push wifi status to userspace */ + SOCK_NOFCS, /* Tell NIC not to do the Ethernet FCS. + * Will use last 4 bytes of packet sent from + * user-space instead. + */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f3a530780753..6eb656acdfe5 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -592,6 +592,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->rxhash = old->rxhash; new->ooo_okay = old->ooo_okay; new->l4_rxhash = old->l4_rxhash; + new->no_fcs = old->no_fcs; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif diff --git a/net/core/sock.c b/net/core/sock.c index 19942d4bb6e6..55011cb691ad 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -799,6 +799,11 @@ set_rcvbuf: else ret = -EOPNOTSUPP; break; + + case SO_NOFCS: + sock_valbool_flag(sk, SOCK_NOFCS, valbool); + break; + default: ret = -ENOPROTOOPT; break; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2dbb32b988c4..ae2d484416dd 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1459,6 +1459,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, struct net_device *dev; __be16 proto = 0; int err; + int extra_len = 0; /* * Get and verify the address. @@ -1493,8 +1494,16 @@ retry: * raw protocol and you must do your own fragmentation at this level. */ + if (unlikely(sock_flag(sk, SOCK_NOFCS))) { + if (!netif_supports_nofcs(dev)) { + err = -EPROTONOSUPPORT; + goto out_unlock; + } + extra_len = 4; /* We're doing our own CRC */ + } + err = -EMSGSIZE; - if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN) + if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len) goto out_unlock; if (!skb) { @@ -1526,7 +1535,7 @@ retry: goto retry; } - if (len > (dev->mtu + dev->hard_header_len)) { + if (len > (dev->mtu + dev->hard_header_len + extra_len)) { /* Earlier code assumed this would be a VLAN pkt, * double-check this now that we have the actual * packet in hand. @@ -1548,6 +1557,9 @@ retry: if (err < 0) goto out_unlock; + if (unlikely(extra_len == 4)) + skb->no_fcs = 1; + dev_queue_xmit(skb); rcu_read_unlock(); return len; @@ -2209,6 +2221,7 @@ static int packet_snd(struct socket *sock, struct packet_sock *po = pkt_sk(sk); unsigned short gso_type = 0; int hlen, tlen; + int extra_len = 0; /* * Get and verify the address. @@ -2288,8 +2301,16 @@ static int packet_snd(struct socket *sock, } } + if (unlikely(sock_flag(sk, SOCK_NOFCS))) { + if (!netif_supports_nofcs(dev)) { + err = -EPROTONOSUPPORT; + goto out_unlock; + } + extra_len = 4; /* We're doing our own CRC */ + } + err = -EMSGSIZE; - if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN)) + if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) goto out_unlock; err = -ENOBUFS; @@ -2315,7 +2336,7 @@ static int packet_snd(struct socket *sock, if (err < 0) goto out_free; - if (!gso_type && (len > dev->mtu + reserve)) { + if (!gso_type && (len > dev->mtu + reserve + extra_len)) { /* Earlier code assumed this would be a VLAN pkt, * double-check this now that we have the actual * packet in hand. @@ -2353,6 +2374,9 @@ static int packet_snd(struct socket *sock, len += vnet_hdr_len; } + if (unlikely(extra_len == 4)) + skb->no_fcs = 1; + /* * Now send it */ -- cgit From 77a1abf54f4b003ad6e59c535045b2ad89fedfeb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Mar 2012 04:50:09 +0000 Subject: net: export netdev_stats_to_stats64 Some drivers use internal netdev stats member to store part of their stats, yet advertize ndo_get_stats64() to implement some 64bit fields. Allow them to use netdev_stats_to_stats64() helper to make the copy of netdev stats before they compute their 64bit counters. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f1b7d037c2c5..4d279c5287f8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2557,6 +2557,8 @@ extern void dev_load(struct net *net, const char *name); extern void dev_mcast_init(void); extern struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, struct rtnl_link_stats64 *storage); +extern void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, + const struct net_device_stats *netdev_stats); extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; diff --git a/net/core/dev.c b/net/core/dev.c index 763a0eda7158..5ef3b65c3687 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5834,12 +5834,12 @@ void netdev_run_todo(void) /* Convert net_device_stats to rtnl_link_stats64. They have the same * fields in the same order, with only the type differing. */ -static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, - const struct net_device_stats *netdev_stats) +void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, + const struct net_device_stats *netdev_stats) { #if BITS_PER_LONG == 64 - BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); - memcpy(stats64, netdev_stats, sizeof(*stats64)); + BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats)); + memcpy(stats64, netdev_stats, sizeof(*stats64)); #else size_t i, n = sizeof(*stats64) / sizeof(u64); const unsigned long *src = (const unsigned long *)netdev_stats; @@ -5851,6 +5851,7 @@ static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, dst[i] = src[i]; #endif } +EXPORT_SYMBOL(netdev_stats_to_stats64); /** * dev_get_stats - get network device statistics -- cgit From 95f050bf7f64be5168ae2e2c715bb0b0ded141d1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 6 Mar 2012 16:12:15 -0500 Subject: net: Use bool for return value of dev_valid_name(). Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- net/core/dev.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4d279c5287f8..a89933bc4f2f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2122,7 +2122,7 @@ extern int netdev_rx_handler_register(struct net_device *dev, void *rx_handler_data); extern void netdev_rx_handler_unregister(struct net_device *dev); -extern int dev_valid_name(const char *name); +extern bool dev_valid_name(const char *name); extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); extern int dev_ethtool(struct net *net, struct ifreq *); extern unsigned dev_get_flags(const struct net_device *); diff --git a/net/core/dev.c b/net/core/dev.c index 5ef3b65c3687..0090809af7bd 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -848,21 +848,21 @@ EXPORT_SYMBOL(dev_get_by_flags_rcu); * to allow sysfs to work. We also disallow any kind of * whitespace. */ -int dev_valid_name(const char *name) +bool dev_valid_name(const char *name) { if (*name == '\0') - return 0; + return false; if (strlen(name) >= IFNAMSIZ) - return 0; + return false; if (!strcmp(name, ".") || !strcmp(name, "..")) - return 0; + return false; while (*name) { if (*name == '/' || isspace(*name)) - return 0; + return false; name++; } - return 1; + return true; } EXPORT_SYMBOL(dev_valid_name); -- cgit From 4d29515f5a2e062c3fe82bfb574da70ed544ffad Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 7 Mar 2012 21:02:35 -0500 Subject: net: Use bool in netdevice.h helpers. Specifically use it in napi_disable_pending(), napi_schedule_prep(), napi_reschedule(), netif_tx_queue_stopped(), netif_queue_stopped(), netif_xmit_stopped(), netif_xmit_frozen_or_stopped(), netif_running(), __netif_subqueue_stopped(), netif_subqueue_stopped(), netif_is_multiquue(), netif_carrier_ok(), netif_dormant(), netif_oper_up(), netif_device_present(), __netif_tx_trylock(), net_gso_ok(), skb_gso_ok(), netif_needs_gso(), and netif_is_bond_slave(). Signed-off-by: David S. Miller --- include/linux/netdevice.h | 52 +++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a89933bc4f2f..b195a34440bb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -417,7 +417,7 @@ typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); extern void __napi_schedule(struct napi_struct *n); -static inline int napi_disable_pending(struct napi_struct *n) +static inline bool napi_disable_pending(struct napi_struct *n) { return test_bit(NAPI_STATE_DISABLE, &n->state); } @@ -431,7 +431,7 @@ static inline int napi_disable_pending(struct napi_struct *n) * insure only one NAPI poll instance runs. We also make * sure there is no pending NAPI disable. */ -static inline int napi_schedule_prep(struct napi_struct *n) +static inline bool napi_schedule_prep(struct napi_struct *n) { return !napi_disable_pending(n) && !test_and_set_bit(NAPI_STATE_SCHED, &n->state); @@ -451,13 +451,13 @@ static inline void napi_schedule(struct napi_struct *n) } /* Try to reschedule poll. Called by dev->poll() after napi_complete(). */ -static inline int napi_reschedule(struct napi_struct *napi) +static inline bool napi_reschedule(struct napi_struct *napi) { if (napi_schedule_prep(napi)) { __napi_schedule(napi); - return 1; + return true; } - return 0; + return false; } /** @@ -1868,7 +1868,7 @@ static inline void netif_tx_stop_all_queues(struct net_device *dev) } } -static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue) +static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue) { return test_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } @@ -1879,17 +1879,17 @@ static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue) * * Test if transmit queue on device is currently unable to send. */ -static inline int netif_queue_stopped(const struct net_device *dev) +static inline bool netif_queue_stopped(const struct net_device *dev) { return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); } -static inline int netif_xmit_stopped(const struct netdev_queue *dev_queue) +static inline bool netif_xmit_stopped(const struct netdev_queue *dev_queue) { return dev_queue->state & QUEUE_STATE_ANY_XOFF; } -static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue) +static inline bool netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue) { return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN; } @@ -1954,7 +1954,7 @@ static inline void netdev_reset_queue(struct net_device *dev_queue) * * Test if the device has been brought up. */ -static inline int netif_running(const struct net_device *dev) +static inline bool netif_running(const struct net_device *dev) { return test_bit(__LINK_STATE_START, &dev->state); } @@ -2004,16 +2004,16 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) * * Check individual transmit queue of a device with multiple transmit queues. */ -static inline int __netif_subqueue_stopped(const struct net_device *dev, - u16 queue_index) +static inline bool __netif_subqueue_stopped(const struct net_device *dev, + u16 queue_index) { struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); return netif_tx_queue_stopped(txq); } -static inline int netif_subqueue_stopped(const struct net_device *dev, - struct sk_buff *skb) +static inline bool netif_subqueue_stopped(const struct net_device *dev, + struct sk_buff *skb) { return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb)); } @@ -2052,7 +2052,7 @@ static inline u16 skb_tx_hash(const struct net_device *dev, * * Check if device has multiple transmit queues */ -static inline int netif_is_multiqueue(const struct net_device *dev) +static inline bool netif_is_multiqueue(const struct net_device *dev) { return dev->num_tx_queues > 1; } @@ -2188,7 +2188,7 @@ extern void linkwatch_forget_dev(struct net_device *dev); * * Check if carrier is present on device */ -static inline int netif_carrier_ok(const struct net_device *dev) +static inline bool netif_carrier_ok(const struct net_device *dev) { return !test_bit(__LINK_STATE_NOCARRIER, &dev->state); } @@ -2240,7 +2240,7 @@ static inline void netif_dormant_off(struct net_device *dev) * * Check if carrier is present on device */ -static inline int netif_dormant(const struct net_device *dev) +static inline bool netif_dormant(const struct net_device *dev) { return test_bit(__LINK_STATE_DORMANT, &dev->state); } @@ -2252,7 +2252,7 @@ static inline int netif_dormant(const struct net_device *dev) * * Check if carrier is operational */ -static inline int netif_oper_up(const struct net_device *dev) +static inline bool netif_oper_up(const struct net_device *dev) { return (dev->operstate == IF_OPER_UP || dev->operstate == IF_OPER_UNKNOWN /* backward compat */); @@ -2264,7 +2264,7 @@ static inline int netif_oper_up(const struct net_device *dev) * * Check if device has not been removed from system. */ -static inline int netif_device_present(struct net_device *dev) +static inline bool netif_device_present(struct net_device *dev) { return test_bit(__LINK_STATE_PRESENT, &dev->state); } @@ -2334,9 +2334,9 @@ static inline void __netif_tx_lock_bh(struct netdev_queue *txq) txq->xmit_lock_owner = smp_processor_id(); } -static inline int __netif_tx_trylock(struct netdev_queue *txq) +static inline bool __netif_tx_trylock(struct netdev_queue *txq) { - int ok = spin_trylock(&txq->_xmit_lock); + bool ok = spin_trylock(&txq->_xmit_lock); if (likely(ok)) txq->xmit_lock_owner = smp_processor_id(); return ok; @@ -2614,7 +2614,7 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev, netdev_features_t netif_skb_features(struct sk_buff *skb); -static inline int net_gso_ok(netdev_features_t features, int gso_type) +static inline bool net_gso_ok(netdev_features_t features, int gso_type) { netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT; @@ -2629,14 +2629,14 @@ static inline int net_gso_ok(netdev_features_t features, int gso_type) return (features & feature) == feature; } -static inline int skb_gso_ok(struct sk_buff *skb, netdev_features_t features) +static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features) { return net_gso_ok(features, skb_shinfo(skb)->gso_type) && (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); } -static inline int netif_needs_gso(struct sk_buff *skb, - netdev_features_t features) +static inline bool netif_needs_gso(struct sk_buff *skb, + netdev_features_t features) { return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); @@ -2648,7 +2648,7 @@ static inline void netif_set_gso_max_size(struct net_device *dev, dev->gso_max_size = size; } -static inline int netif_is_bond_slave(struct net_device *dev) +static inline bool netif_is_bond_slave(struct net_device *dev) { return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; } -- cgit From 5c4903549c05bbb373479e0ce2992573c120654a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 7 Feb 2012 02:29:01 +0000 Subject: net: Fix issue with netdev_tx_reset_queue not resetting queue from XOFF state We are seeing dev_watchdog hangs on several drivers. I suspect this is due to the __QUEUE_STATE_STACK_XOFF bit being set prior to a reset for link change, and then not being cleared by netdev_tx_reset_queue. This change corrects that. In addition we were seeing dev_watchdog hangs on igb after running the ethtool tests. We found this to be due to the fact that the ethtool test runs the same logic as ndo_start_xmit, but we were never clearing the XOFF flag since the loopback test in ethtool does not do byte queue accounting. Signed-off-by: Alexander Duyck Tested-by: Stephen Ko Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 3 ++- include/linux/netdevice.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index fda824735e18..e96cef89f121 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2752,6 +2752,8 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, txdctl |= E1000_TXDCTL_QUEUE_ENABLE; wr32(E1000_TXDCTL(reg_idx), txdctl); + + netdev_tx_reset_queue(txring_txq(ring)); } /** @@ -3244,7 +3246,6 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring) buffer_info = &tx_ring->tx_buffer_info[i]; igb_unmap_and_free_tx_resource(tx_ring, buffer_info); } - netdev_tx_reset_queue(txring_txq(tx_ring)); size = sizeof(struct igb_tx_buffer) * tx_ring->count; memset(tx_ring->tx_buffer_info, 0, size); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b195a34440bb..4bf314fe2145 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1939,6 +1939,7 @@ static inline void netdev_completed_queue(struct net_device *dev, static inline void netdev_tx_reset_queue(struct netdev_queue *q) { #ifdef CONFIG_BQL + clear_bit(__QUEUE_STATE_STACK_XOFF, &q->state); dql_reset(&q->dql); #endif } -- cgit From b37c0fbe3f6dfba1f8ad2aed47fb40578a254635 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 7 Feb 2012 02:29:06 +0000 Subject: net: Add memory barriers to prevent possible race in byte queue limits This change adds a memory barrier to the byte queue limit code to address a possible race as has been seen in the past with the netif_stop_queue/netif_wake_queue logic. Signed-off-by: Alexander Duyck Tested-by: Stephen Ko Signed-off-by: Jeff Kirsher --- include/linux/netdevice.h | 49 +++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 16 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4bf314fe2145..4535a4ea9760 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1899,12 +1899,22 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, { #ifdef CONFIG_BQL dql_queued(&dev_queue->dql, bytes); - if (unlikely(dql_avail(&dev_queue->dql) < 0)) { - set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); - if (unlikely(dql_avail(&dev_queue->dql) >= 0)) - clear_bit(__QUEUE_STATE_STACK_XOFF, - &dev_queue->state); - } + + if (likely(dql_avail(&dev_queue->dql) >= 0)) + return; + + set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); + + /* + * The XOFF flag must be set before checking the dql_avail below, + * because in netdev_tx_completed_queue we update the dql_completed + * before checking the XOFF flag. + */ + smp_mb(); + + /* check again in case another CPU has just made room avail */ + if (unlikely(dql_avail(&dev_queue->dql) >= 0)) + clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); #endif } @@ -1917,16 +1927,23 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, unsigned pkts, unsigned bytes) { #ifdef CONFIG_BQL - if (likely(bytes)) { - dql_completed(&dev_queue->dql, bytes); - if (unlikely(test_bit(__QUEUE_STATE_STACK_XOFF, - &dev_queue->state) && - dql_avail(&dev_queue->dql) >= 0)) { - if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF, - &dev_queue->state)) - netif_schedule_queue(dev_queue); - } - } + if (unlikely(!bytes)) + return; + + dql_completed(&dev_queue->dql, bytes); + + /* + * Without the memory barrier there is a small possiblity that + * netdev_tx_sent_queue will miss the update and cause the queue to + * be stopped forever + */ + smp_mb(); + + if (dql_avail(&dev_queue->dql) < 0) + return; + + if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state)) + netif_schedule_queue(dev_queue); #endif } -- cgit From cdbee74ce74cd66de42a6f6d0edfa74fa9a66f2a Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Fri, 16 Mar 2012 23:08:11 +0000 Subject: net: do not do gso for CHECKSUM_UNNECESSARY in netif_needs_gso This is related to fixing the bug of dropping FCoE frames when disabling tx ip checksum by 'ethtool -K ethx tx off'. The FCoE protocol stack driver would use CHECKSUM_UNNECESSARY on tx path instead of CHECKSUM_PARTIAL (as indicated in the 2/2 of this series). To do so, netif_needs_gso() has to be changed here to not do gso for both CHECKSUM_PARTIAL and CHECKSUM_UNNECESSARY. Ref. to original discussion thread: http://patchwork.ozlabs.org/patch/146567/ Signed-off-by: Yi Zou Tested-by: Ross Brattain Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4535a4ea9760..261067b94559 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2657,7 +2657,8 @@ static inline bool netif_needs_gso(struct sk_buff *skb, netdev_features_t features) { return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || - unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); + unlikely((skb->ip_summed != CHECKSUM_PARTIAL) && + (skb->ip_summed != CHECKSUM_UNNECESSARY))); } static inline void netif_set_gso_max_size(struct net_device *dev, -- cgit